diff --git a/refact-agent/engine/Cargo.toml b/refact-agent/engine/Cargo.toml
index c1a6191e2..bc16b8ca2 100644
--- a/refact-agent/engine/Cargo.toml
+++ b/refact-agent/engine/Cargo.toml
@@ -59,7 +59,6 @@ rust-embed = "8.5.0"
 percent-encoding = "2.3"
 serde = { version = "1", features = ["rc", "derive"] }
 serde_cbor = "0.11.2"
-serde-inline-default = "0.2.3"
 serde_json = { version = "1", features = ["preserve_order"] }
 serde_yaml = "0.9.31"
 # all features = ["compression", "docs", "event_log", "failpoints", "io_uring", "lock_free_delays", "measure_allocs", "miri_optimizations", "mutex", "no_inline", "no_logs", "pretty_backtrace", "testing"]
diff --git a/refact-agent/engine/bring_your_own_key/hf.yaml b/refact-agent/engine/bring_your_own_key/hf.yaml
deleted file mode 100644
index 68a85453f..000000000
--- a/refact-agent/engine/bring_your_own_key/hf.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-cloud_name: HuggingFace API
-
-completion_endpoint: "https://api-inference.huggingface.co/models/$MODEL"
-completion_endpoint_style: "hf"
-completion_model: bigcode/starcoder2-3b
-completion_apikey: "$HF_TOKEN"
-
-chat_endpoint: "https://api-inference.huggingface.co/models/$MODEL"
-chat_endpoint_style: "hf"
-chat_apikey: "$HF_TOKEN"
-chat_model: meta-llama/Llama-2-70b-chat-hf
-
-tokenizer_rewrite_path:   # because you need to agree to licensing agreement in the official repo to even download a tokenizer
-  meta-llama/Llama-2-70b-chat-hf: TheBloke/Llama-2-70B-fp16
-
-embedding_endpoint: "https://api-inference.huggingface.co/pipeline/feature-extraction/$MODEL"
-embedding_endpoint_style: "hf"
-embedding_apikey: "$HF_TOKEN"
-embedding_model: thenlper/gte-base
-embedding_size: 768
-#embedding_batch: 64
diff --git a/refact-agent/engine/bring_your_own_key/mixed.yaml b/refact-agent/engine/bring_your_own_key/mixed.yaml
deleted file mode 100644
index f28d8424c..000000000
--- a/refact-agent/engine/bring_your_own_key/mixed.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-cloud_name: Mixed API
-
-chat_endpoint: "https://openrouter.ai/api/v1/chat/completions"
-chat_apikey: "$OPENROUTER_API_KEY"
-chat_model: meta-llama/llama-3.1-8b-instruct
-  
-completion_endpoint_style: "hf"
-completion_endpoint: "https://api-inference.huggingface.co/models/$MODEL"
-completion_model: bigcode/starcoder2-3b
-completion_apikey: "$HF_TOKEN"
-
-embedding_endpoint: "https://api.openai.com/v1/embeddings"
-embedding_apikey: "$OPENAI_API_KEY"
-embedding_default_model: text-embedding-3-small
-embedding_size: 1536
-
-tokenizer_rewrite_path:
-  meta-llama/llama-3.1-8b-instruct: unsloth/llama-3-8b-bnb-4bit
-  
-
diff --git a/refact-agent/engine/bring_your_own_key/openai.yaml b/refact-agent/engine/bring_your_own_key/openai.yaml
deleted file mode 100644
index 9cd41f51a..000000000
--- a/refact-agent/engine/bring_your_own_key/openai.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-cloud_name: OpenAI API
-
-chat_endpoint: "https://api.openai.com/v1/chat/completions"
-chat_apikey: "sk-..."       # or use $OPENAI_API_KEY if you have it in global environment variables
-chat_model: gpt-4o-mini
-
-embedding_endpoint: "https://api.openai.com/v1/embeddings"
-embedding_apikey: "sk-..."
-embedding_model: text-embedding-3-small
-embedding_size: 1536
-
-# no code completion though :/
-
-
-running_models:
-  - gpt-4o
diff --git a/refact-agent/engine/bring_your_own_key/openrouter.yaml b/refact-agent/engine/bring_your_own_key/openrouter.yaml
deleted file mode 100644
index 912a97f38..000000000
--- a/refact-agent/engine/bring_your_own_key/openrouter.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-cloud_name: OpenRouter API
-
-chat_endpoint: "https://openrouter.ai/api/v1/chat/completions"
-chat_apikey: "$OPENROUTER_API_KEY"
-chat_model: meta-llama/llama-3.1-8b-instruct
-tokenizer_rewrite_path:
-  meta-llama/llama-3.1-8b-instruct: unsloth/llama-3-8b-bnb-4bit
-
-running_models:
-  - gpt-4o
-  - meta-llama/llama-3.1-8b-instruct
-
-# no code completion though :/
diff --git a/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml b/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml
deleted file mode 100644
index 28681d4cc..000000000
--- a/refact-agent/engine/bring_your_own_key/refact_self_hosting.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-cloud_name: Refact local self-hosting server
-
-# Under development
-#chat_endpoint: "http://localhost:8008/v1/chat/completions"
-#chat_model: "qwen2.5/coder/1.5b/instruct"
-
-embedding_endpoint: "http://localhost:8008/v1/embeddings"
-embedding_model: "thenlper/gte-base"
-
-completion_endpoint: "http://localhost:8008/v1/completions"
-completion_model: "Refact/1.6B"
diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py
index 720a4a1a2..5ea5d435b 100644
--- a/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py
+++ b/refact-agent/engine/python_binding_and_cmdline/refact/cli_main.py
@@ -54,9 +54,9 @@ async def answer_question_in_arguments(settings, arg_question):
 async def welcome_message(settings: cli_settings.CmdlineArgs, tip: str):
     text = f"""
 ~/.cache/refact/cli.yaml                -- set up this program
-~/.cache/refact/bring-your-own-key.yaml -- set up models you want to use
-~/.cache/refact/integrations.d/*        -- set up github, jira, make, gdb, and other tools, including which actions require confirmation
-~/.cache/refact/privacy.yaml            -- which files should never leave your computer
+~/.config/refact/providers.d/*.yaml      -- set up model providers you want to use
+~/.config/refact/integrations.d/*        -- set up github, jira, make, gdb, and other tools, including which actions require confirmation
+~/.config/refact/privacy.yaml            -- which files should never leave your computer
 Project: {settings.project_path}
 To exit, type 'exit' or Ctrl+D. {tip}.
 """
@@ -345,8 +345,8 @@ async def actual_chat(
     app = Application(key_bindings=kb, layout=layout)
     app.editing_mode = cli_settings.cli_yaml.get_editing_mode()
 
-    if cli_settings.args.model not in caps.code_chat_models:
-        known_models = list(caps.code_chat_models.keys())
+    if cli_settings.args.model not in caps.chat_models:
+        known_models = list(caps.chat_models.keys())
         print(f"model {cli_settings.args.model} is unknown, pick one of {known_models}")
         return
 
diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py
index 7dcbe8c5c..71d110238 100644
--- a/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py
+++ b/refact-agent/engine/python_binding_and_cmdline/refact/cli_settings.py
@@ -8,15 +8,12 @@
 
 class CapsModel(BaseModel):
     n_ctx: int
-    similar_models: List[str]
     supports_tools: bool
 
 
 class Caps(BaseModel):
-    cloud_name: str
-    code_chat_models: Dict[str, CapsModel]
-    code_chat_default_model: str
-    embedding_model: str
+    chat_models: Dict[str, CapsModel]
+    chat_default_model: str
 
 
 class SettingsCLI(BaseModel):
@@ -40,9 +37,7 @@ def get_editing_mode(self):
 
 
 default_config = """
-# The caps file is bring-your-own-key.yaml by default, that in turn works with OPENAI_API_KEY inside by default.
-# But you can change it to:
-#address_url: Refact
+address_url: Refact
 #api_key: <take-from-website>
 #address_url: http://your-self-hosting-server/
 #api_key: your-secret-key
@@ -66,14 +61,14 @@ def get_editing_mode(self):
 class CmdlineArgs:
     def __init__(self, caps: Caps, *, model: str, path_to_project: str, always_pause: bool, chat_id: str, chat_remote: bool):
         self.caps = caps
-        self.model = model or caps.code_chat_default_model
+        self.model = model or caps.chat_default_model
         self.project_path = path_to_project
         self.always_pause = always_pause
         self.chat_id = chat_id
         self.chat_remote = chat_remote
 
     def n_ctx(self):
-        return self.caps.code_chat_models[self.model].n_ctx
+        return self.caps.chat_models[self.model].n_ctx
 
 
 args: Optional[CmdlineArgs] = None
diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py b/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py
index 4ca448f8c..18d0553cf 100644
--- a/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py
+++ b/refact-agent/engine/python_binding_and_cmdline/refact/cli_streaming.py
@@ -109,7 +109,7 @@ def process_streaming_data(data: Dict[str, Any], deltas_collector: Optional[chat
                 assert deltas_collector.choices[0].tool_calls is not None
                 streaming_toolcall = list(deltas_collector.choices[0].tool_calls)
                 update_entertainment_box()
-        finish_reason = choices[0]['finish_reason']
+        finish_reason = choices[0].get('finish_reason')
         if finish_reason == "stop":
             print_response("\n")
         if finish_reason == "tool_calls":
diff --git a/refact-agent/engine/src/agentic/compress_trajectory.rs b/refact-agent/engine/src/agentic/compress_trajectory.rs
index c93332589..e5dd5b93d 100644
--- a/refact-agent/engine/src/agentic/compress_trajectory.rs
+++ b/refact-agent/engine/src/agentic/compress_trajectory.rs
@@ -89,16 +89,15 @@ pub async fn compress_trajectory(
     if messages.is_empty() {
         return Err("The provided chat is empty".to_string());
     }
-    let (model_name, n_ctx) = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+    let (model_id, n_ctx) = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
         Ok(caps) => {
-            let caps_locked = caps.read().unwrap();
-            let model_name = caps_locked.code_chat_default_model.clone();
-            if let Some(model_rec) = caps_locked.code_completion_models.get(&strip_model_from_finetune(&model_name)) {
-                Ok((model_name, model_rec.n_ctx))
+            let model_id = caps.defaults.chat_default_model.clone();
+            if let Some(model_rec) = caps.completion_models.get(&strip_model_from_finetune(&model_id)) {
+                Ok((model_id, model_rec.base.n_ctx))
             } else {
                 Err(format!(
-                    "Model '{}' not found. Server has these models: {:?}",
-                    model_name, caps_locked.code_completion_models.keys()
+                    "Model '{}' not found, server has these models: {:?}",
+                    model_id, caps.completion_models.keys()
                 ))
             }
         },
@@ -120,12 +119,12 @@ pub async fn compress_trajectory(
         messages_compress.clone(),
         "".to_string(),
         false,
-        model_name.clone(),
+        model_id.clone(),
     ).await));
     let tools = gather_used_tools(&messages);
     let new_messages = subchat_single(
         ccx.clone(),
-        model_name.as_str(),
+        &model_id,
         messages_compress,
         Some(tools),
         None,
diff --git a/refact-agent/engine/src/agentic/generate_commit_message.rs b/refact-agent/engine/src/agentic/generate_commit_message.rs
index 5cd7062a9..bb469559f 100644
--- a/refact-agent/engine/src/agentic/generate_commit_message.rs
+++ b/refact-agent/engine/src/agentic/generate_commit_message.rs
@@ -265,11 +265,8 @@ pub async fn generate_commit_message_by_diff(
             },
         ]
     };
-    let model_name = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
-        Ok(caps) => caps
-            .read()
-            .map(|x| Ok(x.code_chat_default_model.clone()))
-            .map_err(|_| "Caps are not available".to_string())?,
+    let model_id = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => Ok(caps.defaults.chat_default_model.clone()),
         Err(_) => Err("No caps available".to_string()),
     }?;
     let ccx: Arc<AMutex<AtCommandsContext>> = Arc::new(AMutex::new(AtCommandsContext::new(
@@ -280,11 +277,11 @@ pub async fn generate_commit_message_by_diff(
         messages.clone(),
         "".to_string(),
         false,
-        model_name.clone(),
+        model_id.clone(),
     ).await));
     let new_messages = subchat_single(
         ccx.clone(),
-        model_name.as_str(),
+        &model_id,
         messages,
         Some(vec![]),
         None,
diff --git a/refact-agent/engine/src/agentic/generate_follow_up_message.rs b/refact-agent/engine/src/agentic/generate_follow_up_message.rs
index fae6e0aeb..e6faca196 100644
--- a/refact-agent/engine/src/agentic/generate_follow_up_message.rs
+++ b/refact-agent/engine/src/agentic/generate_follow_up_message.rs
@@ -74,8 +74,7 @@ fn _make_conversation(
 pub async fn generate_follow_up_message(
     messages: Vec<ChatMessage>,
     gcx: Arc<ARwLock<GlobalContext>>,
-    light_model_name: String,
-    current_model_name: &String,
+    model_id: &str,
     chat_id: &str,
 ) -> Result<FollowUpResponse, String> {
     let ccx = Arc::new(AMutex::new(AtCommandsContext::new(
@@ -86,11 +85,11 @@ pub async fn generate_follow_up_message(
         messages.clone(),
         chat_id.to_string(),
         false,
-        current_model_name.clone(),
+        model_id.to_string(),
     ).await));
     let updated_messages: Vec<Vec<ChatMessage>> = subchat_single(
         ccx.clone(),
-        &light_model_name,
+        model_id,
         _make_conversation(&messages),
         Some(vec![]),
         None,
diff --git a/refact-agent/engine/src/ast/chunk_utils.rs b/refact-agent/engine/src/ast/chunk_utils.rs
index fcc73d84b..569880bf3 100644
--- a/refact-agent/engine/src/ast/chunk_utils.rs
+++ b/refact-agent/engine/src/ast/chunk_utils.rs
@@ -1,13 +1,13 @@
 use std::collections::VecDeque;
 use std::path::PathBuf;
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 
 use itertools::Itertools;
 use ropey::Rope;
 use tokenizers::Tokenizer;
 
-use crate::ast::count_tokens;
+use crate::tokens::count_text_tokens;
+use crate::tokens::count_text_tokens_with_fallback;
 use crate::vecdb::vdb_structs::SplitResult;
 
 
@@ -17,9 +17,8 @@ pub fn official_text_hashing_function(s: &str) -> String {
 }
 
 
-fn split_line_if_needed(line: &str, tokenizer: Option<Arc<StdRwLock<Tokenizer>>>, tokens_limit: usize) -> Vec<String> {
+fn split_line_if_needed(line: &str, tokenizer: Option<Arc<Tokenizer>>, tokens_limit: usize) -> Vec<String> {
     if let Some(tokenizer) = tokenizer {
-        let tokenizer = tokenizer.read().unwrap();
         tokenizer.encode(line, false).map_or_else(
             |_| split_without_tokenizer(line, tokens_limit),
             |tokens| {
@@ -39,7 +38,7 @@ fn split_line_if_needed(line: &str, tokenizer: Option<Arc<StdRwLock<Tokenizer>>>
 }
 
 fn split_without_tokenizer(line: &str, tokens_limit: usize) -> Vec<String> {
-    if count_tokens(None, line) <= tokens_limit {
+    if count_text_tokens(None, line).is_ok_and(|tokens| tokens <= tokens_limit) {
         vec![line.to_string()]
     } else {
         Rope::from_str(line).chars()
@@ -54,7 +53,7 @@ pub fn get_chunks(text: &String,
                   file_path: &PathBuf,
                   symbol_path: &String,
                   top_bottom_rows: (usize, usize), // case with top comments
-                  tokenizer: Option<Arc<StdRwLock<Tokenizer>>>,
+                  tokenizer: Option<Arc<Tokenizer>>,
                   tokens_limit: usize,
                   intersection_lines: usize,
                   use_symbol_range_always: bool, // use for skeleton case
@@ -70,7 +69,7 @@ pub fn get_chunks(text: &String,
         let mut previous_start = line_idx;
         while line_idx < lines.len() {
             let line = lines[line_idx];
-            let line_tok_n = count_tokens(tokenizer.clone(), line);
+            let line_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line);
 
             if !accum.is_empty() && current_tok_n + line_tok_n > tokens_limit {
                 let current_line = accum.iter().map(|(line, _)| line).join("\n");
@@ -105,7 +104,7 @@ pub fn get_chunks(text: &String,
         current_tok_n = 0;
         while line_idx >= 0 {
             let line = lines[line_idx as usize];
-            let text_orig_tok_n = count_tokens(tokenizer.clone(), line);
+            let text_orig_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line);
             if !accum.is_empty() && current_tok_n + text_orig_tok_n > tokens_limit {
                 let current_line = accum.iter().map(|(line, _)| line).join("\n");
                 let start_line = if use_symbol_range_always { top_row as u64 } else { accum.front().unwrap().1 as u64 };
@@ -153,10 +152,10 @@ pub fn get_chunks(text: &String,
 mod tests {
     use std::path::PathBuf;
     use std::str::FromStr;
-    use std::sync::{Arc, RwLock as StdRwLock};
+    use std::sync::Arc;
 
     use crate::ast::chunk_utils::get_chunks;
-    use crate::ast::count_tokens;
+    use crate::tokens::count_text_tokens;
     // use crate::vecdb::vdb_structs::SplitResult;
 
     const DUMMY_TOKENIZER: &str = include_str!("dummy_tokenizer.json");
@@ -174,15 +173,15 @@ mod tests {
 
     #[test]
     fn dummy_tokenizer_test() {
-        let tokenizer = Arc::new(StdRwLock::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap()));
-        let text_orig_tok_n = count_tokens(Some(tokenizer.clone()), PYTHON_CODE);
+        let tokenizer = Arc::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap());
+        let text_orig_tok_n = count_text_tokens(Some(tokenizer.clone()), PYTHON_CODE).unwrap();
         assert_eq!(text_orig_tok_n, PYTHON_CODE.len());
     }
 
     #[test]
     fn simple_chunk_test_1_with_128_limit() {
-        let tokenizer = Arc::new(StdRwLock::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap()));
-        let orig = include_str!("../caps.rs").to_string();
+        let tokenizer = Some(Arc::new(tokenizers::Tokenizer::from_str(DUMMY_TOKENIZER).unwrap()));
+        let orig = include_str!("../caps/mod.rs").to_string();
         let token_limits = [10, 50, 100, 200, 300];
         for &token_limit in &token_limits {
             let chunks = get_chunks(
@@ -190,7 +189,7 @@ mod tests {
                 &PathBuf::from_str("/tmp/test.py").unwrap(),
                 &"".to_string(),
                 (0, 10),
-                Some(tokenizer.clone()),
+                tokenizer.clone(),
                 token_limit, 2, false);
             let mut not_present: Vec<char> = orig.chars().collect();
             let mut result = String::new();
diff --git a/refact-agent/engine/src/ast/file_splitter.rs b/refact-agent/engine/src/ast/file_splitter.rs
index 044edccf9..ab5e28a44 100644
--- a/refact-agent/engine/src/ast/file_splitter.rs
+++ b/refact-agent/engine/src/ast/file_splitter.rs
@@ -1,8 +1,8 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 use itertools::Itertools;
+use tokenizers::Tokenizer;
 use tokio::sync::RwLock;
-use std::sync::RwLock as StdRwLock;
 use uuid::Uuid;
 
 use crate::ast::treesitter::parsers::get_ast_parser_by_filename;
@@ -30,7 +30,7 @@ impl AstBasedFileSplitter {
     pub async fn vectorization_split(
         &self,
         doc: &Document,
-        tokenizer: Option<Arc<StdRwLock<tokenizers::Tokenizer>>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         gcx: Arc<RwLock<crate::global_context::GlobalContext>>,
         tokens_limit: usize,
     ) -> Result<Vec<crate::vecdb::vdb_structs::SplitResult>, String> {
diff --git a/refact-agent/engine/src/ast/mod.rs b/refact-agent/engine/src/ast/mod.rs
index 852c51be1..e00095ad4 100644
--- a/refact-agent/engine/src/ast/mod.rs
+++ b/refact-agent/engine/src/ast/mod.rs
@@ -1,7 +1,5 @@
 use std::collections::HashMap;
 use std::sync::Arc;
-#[cfg(feature="vecdb")]
-use std::sync::RwLock as StdRwLock;
 use std::cell::RefCell;
 use uuid::Uuid;
 use crate::files_in_workspace::Document;
@@ -23,27 +21,6 @@ pub mod chunk_utils;
 pub mod parse_python;
 pub mod parse_common;
 
-
-#[cfg(feature="vecdb")]
-pub fn count_tokens(
-    tokenizer: Option<Arc<StdRwLock<tokenizers::Tokenizer>>>,
-    text: &str,
-) -> usize {
-    if let Some(tokenizer) = tokenizer {
-        let tokenizer_locked = tokenizer.write().unwrap();
-        let tokens = match tokenizer_locked.encode(text, false) {
-            Ok(tokens) => tokens,
-            Err(err) => {
-                tracing::warn!("Encoding error: {}", err);
-                return 0;
-            }
-        };
-        tokens.len()
-    } else {
-        1 + text.len() / 3
-    }
-}
-
 pub fn lowlevel_file_markup(
     doc: &Document,
     symbols: &Vec<SymbolInformation>,
diff --git a/refact-agent/engine/src/at_commands/at_search.rs b/refact-agent/engine/src/at_commands/at_search.rs
index 067d6faf0..6f576b69e 100644
--- a/refact-agent/engine/src/at_commands/at_search.rs
+++ b/refact-agent/engine/src/at_commands/at_search.rs
@@ -7,7 +7,6 @@ use crate::nicer_logs::last_n_chars;
 
 use crate::at_commands::execute_at::AtCommandMember;
 use crate::call_validation::{ContextEnum, ContextFile};
-use crate::caps::get_custom_embedding_api_key;
 use crate::vecdb;
 use crate::vecdb::vdb_structs::VecdbSearch;
 
@@ -69,18 +68,12 @@ pub async fn execute_at_search(
         (ccx_locked.global_context.clone(), ccx_locked.top_n)
     };
 
-    let api_key = get_custom_embedding_api_key(gcx.clone()).await;
-    if let Err(err) = api_key {
-        return Err(err.message);
-    }
-    let api_key = api_key.unwrap();
-
     let vec_db = gcx.read().await.vec_db.clone();
     let r = match *vec_db.lock().await {
         Some(ref db) => {
             let top_n_twice_as_big = top_n * 2;  // top_n will be cut at postprocessing stage, and we really care about top_n files, not pieces
             // TODO: this code sucks, release lock, don't hold anything during the search
-            let search_result = db.vecdb_search(query.clone(), top_n_twice_as_big, vecdb_scope_filter_mb, &api_key).await?;
+            let search_result = db.vecdb_search(query.clone(), top_n_twice_as_big, vecdb_scope_filter_mb).await?;
             let results = search_result.results.clone();
             return Ok(results2message(&results));
         }
diff --git a/refact-agent/engine/src/at_commands/execute_at.rs b/refact-agent/engine/src/at_commands/execute_at.rs
index 53511d9d6..f91b9387d 100644
--- a/refact-agent/engine/src/at_commands/execute_at.rs
+++ b/refact-agent/engine/src/at_commands/execute_at.rs
@@ -1,4 +1,4 @@
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use tokio::sync::Mutex as AMutex;
 use regex::Regex;
 use serde_json::{json, Value};
@@ -20,7 +20,7 @@ pub const MIN_RAG_CONTEXT_LIMIT: usize = 256;
 
 pub async fn run_at_commands_locally(
     ccx: Arc<AMutex<AtCommandsContext>>,
-    tokenizer: Arc<RwLock<Tokenizer>>,
+    tokenizer: Option<Arc<Tokenizer>>,
     maxgen: usize,
     original_messages: &Vec<ChatMessage>,
     stream_back_to_user: &mut HasRagResults,
@@ -164,7 +164,7 @@ pub async fn run_at_commands_locally(
 
 pub async fn run_at_commands_remotely(
     ccx: Arc<AMutex<AtCommandsContext>>,
-    model_name: &str,
+    model_id: &str,
     maxgen: usize,
     original_messages: &Vec<ChatMessage>,
     stream_back_to_user: &mut HasRagResults,
@@ -186,7 +186,7 @@ pub async fn run_at_commands_remotely(
         maxgen,
         subchat_tool_parameters,
         postprocess_parameters,
-        model_name: model_name.to_string(),
+        model_name: model_id.to_string(),
         chat_id: chat_id.clone(),
     };
 
diff --git a/refact-agent/engine/src/cached_tokenizers.rs b/refact-agent/engine/src/cached_tokenizers.rs
deleted file mode 100644
index 10e477396..000000000
--- a/refact-agent/engine/src/cached_tokenizers.rs
+++ /dev/null
@@ -1,163 +0,0 @@
-use tokio::io::AsyncWriteExt;
-use std::path::Path;
-use std::sync::{Arc, RwLock as StdRwLock};
-use std::time::Duration;
-use tokio::sync::RwLock as ARwLock;
-use tokio::sync::Mutex as AMutex;
-use tokenizers::Tokenizer;
-use reqwest::header::AUTHORIZATION;
-use reqwest::Response;
-use tracing::{error, info};
-use uuid::Uuid;
-
-use crate::global_context::GlobalContext;
-use crate::caps::{CodeAssistantCaps, strip_model_from_finetune};
-
-
-async fn try_open_tokenizer(
-    res: Response,
-    to: impl AsRef<Path>,
-) -> Result<(), String> {
-    let mut file = tokio::fs::OpenOptions::new()
-        .write(true)
-        .create(true)
-        .open(&to)
-        .await
-        .map_err(|e| format!("failed to open file: {}", e))?;
-    file.write_all(&res.bytes().await
-        .map_err(|e| format!("failed to fetch bytes: {}", e))?
-    ).await.map_err(|e| format!("failed to write to file: {}", e))?;
-    file.flush().await.map_err(|e| format!("failed to flush file: {}", e))?;
-    info!("saved tokenizer to {}", to.as_ref().display());
-    Ok(())
-}
-
-async fn download_tokenizer_file(
-    http_client: &reqwest::Client,
-    http_path: &str,
-    api_token: String,
-    to: impl AsRef<Path>,
-) -> Result<(), String> {
-    tokio::fs::create_dir_all(
-        to.as_ref().parent().ok_or_else(|| "tokenizer path has no parent")?,
-    ).await.map_err(|e| format!("failed to create parent dir: {}", e))?;
-    if to.as_ref().exists() {
-        return Ok(());
-    }
-
-    info!("downloading tokenizer from {}", http_path);
-    let mut req = http_client.get(http_path);
-    if api_token.to_lowercase().starts_with("hf_") {
-        req = req.header(AUTHORIZATION, format!("Bearer {api_token}"))
-    }
-    let res = req
-        .send()
-        .await
-        .map_err(|e| format!("failed to get response: {}", e))?
-        .error_for_status()
-        .map_err(|e| format!("failed to get response: {}", e))?;
-    try_open_tokenizer(res, to).await?;
-    Ok(())
-}
-
-fn check_json_file(path: &Path) -> bool {
-    match Tokenizer::from_file(path) {
-        Ok(_) => { true }
-        Err(_) => { false }
-    }
-}
-
-async fn try_download_tokenizer_file_and_open(
-    http_client: &reqwest::Client,
-    http_path: &str,
-    api_token: String,
-    to: impl AsRef<Path>,
-) -> Result<(), String> {
-    let path = to.as_ref();
-    if path.exists() && check_json_file(path) {
-        return Ok(());
-    }
-
-    let tmp_file = std::env::temp_dir().join(Uuid::new_v4().to_string());
-    let tmp_path = tmp_file.as_path();
-
-    for i in 0..15 {
-        if i != 0 {
-            tokio::time::sleep(Duration::from_millis(200)).await;
-        }
-        let res = download_tokenizer_file(http_client, http_path, api_token.clone(), tmp_path).await;
-        if res.is_err() {
-            error!("failed to download tokenizer: {}", res.unwrap_err());
-            continue;
-        }
-
-        let parent = path.parent();
-        if parent.is_none() {
-            error!("failed to download tokenizer: parent is not set");
-            continue;
-        }
-
-        let res = tokio::fs::create_dir_all(parent.unwrap()).await;
-        if res.is_err() {
-            error!("failed to create parent dir: {}", res.unwrap_err());
-            continue;
-        }
-
-        if !check_json_file(tmp_path) {
-            error!("failed to download tokenizer: file is not a tokenizer");
-            continue;
-        }
-
-        match tokio::fs::copy(tmp_path, path).await {
-            Ok(_) => {
-                info!("moved tokenizer to {}", path.display());
-                return Ok(());
-            },
-            Err(_) => { continue; }
-        }
-    }
-    Err("failed to download tokenizer".to_string())
-}
-
-pub async fn cached_tokenizer(
-    caps: Arc<StdRwLock<CodeAssistantCaps>>,
-    global_context: Arc<ARwLock<GlobalContext>>,
-    model_name: String,
-) -> Result<Arc<StdRwLock<Tokenizer>>, String> {
-    let model_name = strip_model_from_finetune(&model_name);
-    let tokenizer_download_lock: Arc<AMutex<bool>> = global_context.read().await.tokenizer_download_lock.clone();
-    let _tokenizer_download_locked = tokenizer_download_lock.lock().await;
-
-    let (client2, cache_dir, tokenizer_arc, api_key) = {
-        let cx_locked = global_context.read().await;
-        (cx_locked.http_client.clone(), cx_locked.cache_dir.clone(), cx_locked.tokenizer_map.clone().get(&model_name).cloned(), cx_locked.cmdline.api_key.clone())
-    };
-
-    if tokenizer_arc.is_some() {
-        return Ok(tokenizer_arc.unwrap().clone())
-    }
-
-    let tokenizer_cache_dir = std::path::PathBuf::from(cache_dir).join("tokenizers");
-    tokio::fs::create_dir_all(&tokenizer_cache_dir)
-        .await
-        .expect("failed to create cache dir");
-    let to = tokenizer_cache_dir.join(model_name.clone()).join("tokenizer.json");
-    let http_path = {
-        let caps_locked = caps.read().unwrap();
-        if caps_locked.tokenizer_path_template.is_empty() {
-            caps_locked.tokenizer_rewrite_path.get(&model_name).unwrap_or(&model_name).clone()
-        } else {
-            let rewritten_model_name = caps_locked.tokenizer_rewrite_path.get(&model_name).unwrap_or(&model_name);
-            caps_locked.tokenizer_path_template.replace("$MODEL", rewritten_model_name)
-        }
-    };
-    try_download_tokenizer_file_and_open(&client2, http_path.as_str(), api_key.clone(), &to).await?;
-    info!("loading tokenizer \"{}\"", to.display());
-    let mut tokenizer = Tokenizer::from_file(to).map_err(|e| format!("failed to load tokenizer: {}", e))?;
-    let _ = tokenizer.with_truncation(None);
-    tokenizer.with_padding(None);
-    let arc = Arc::new(StdRwLock::new(tokenizer));
-
-    global_context.write().await.tokenizer_map.insert(model_name.clone(), arc.clone());
-    Ok(arc)
-}
diff --git a/refact-agent/engine/src/call_validation.rs b/refact-agent/engine/src/call_validation.rs
index eeeca8ff0..67262783b 100644
--- a/refact-agent/engine/src/call_validation.rs
+++ b/refact-agent/engine/src/call_validation.rs
@@ -63,8 +63,6 @@ pub struct CodeCompletionPost {
     #[serde(default)]
     pub model: String,
     #[serde(default)]
-    pub scratchpad: String,
-    #[serde(default)]
     pub stream: bool,
     #[serde(default)]
     pub no_cache: bool,
@@ -184,8 +182,33 @@ pub struct ChatMessage {
     pub thinking_blocks: Option<Vec<serde_json::Value>>,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Copy)]
+#[serde(rename_all = "lowercase")]
+pub enum ModelType {
+    Chat,
+    Completion,
+    Embedding,    
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ChatModelType {
+    Light,
+    Default,
+    Thinking
+}
+
+impl Default for ChatModelType {
+    fn default() -> Self {
+        ChatModelType::Default
+    }
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct SubchatParameters {
+    #[serde(default)]
+    pub subchat_model_type: ChatModelType,
+    #[serde(default)]
     pub subchat_model: String,
     pub subchat_n_ctx: usize,
     #[serde(default)]
@@ -205,8 +228,6 @@ pub struct ChatPost {
     pub parameters: SamplingParameters,
     #[serde(default)]
     pub model: String,
-    #[serde(default)]
-    pub scratchpad: String,
     pub stream: Option<bool>,
     pub temperature: Option<f32>,
     #[serde(default)]
@@ -361,7 +382,6 @@ mod tests {
                 ..Default::default()
             },
             model: "".to_string(),
-            scratchpad: "".to_string(),
             stream: false,
             no_cache: false,
             use_ast: true,
@@ -392,7 +412,6 @@ mod tests {
                 ..Default::default()
             },
             model: "".to_string(),
-            scratchpad: "".to_string(),
             stream: false,
             no_cache: false,
             use_ast: true,
@@ -423,7 +442,6 @@ mod tests {
                 ..Default::default()
             },
             model: "".to_string(),
-            scratchpad: "".to_string(),
             stream: false,
             no_cache: false,
             use_ast: true,
@@ -454,7 +472,6 @@ mod tests {
                 ..Default::default()
             },
             model: "".to_string(),
-            scratchpad: "".to_string(),
             stream: false,
             no_cache: false,
             use_ast: true,
diff --git a/refact-agent/engine/src/caps.rs b/refact-agent/engine/src/caps.rs
deleted file mode 100644
index 28026a9e6..000000000
--- a/refact-agent/engine/src/caps.rs
+++ /dev/null
@@ -1,715 +0,0 @@
-use std::path::PathBuf;
-use std::collections::HashMap;
-use indexmap::IndexMap;
-use std::fs::File;
-use std::io::Read;
-use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
-use serde::Deserialize;
-use serde::Serialize;
-use serde_json::Value;
-use tokio::sync::RwLock as ARwLock;
-use url::Url;
-use tracing::{error, info, warn};
-
-use crate::custom_error::ScratchError;
-use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext};
-use crate::known_models::KNOWN_MODELS;
-
-
-const CAPS_FILENAME: &str = "refact-caps";
-const CAPS_FILENAME_FALLBACK: &str = "coding_assistant_caps.json";
-
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct ModelRecord {
-    #[serde(default)]
-    pub n_ctx: usize,
-    #[serde(default)]
-    pub supports_scratchpads: HashMap<String, Value>,
-    #[serde(default)]
-    pub default_scratchpad: String,
-    #[serde(default)]
-    pub similar_models: Vec<String>,
-    #[serde(default)]
-    pub supports_tools: bool,
-    #[serde(default)]
-    pub supports_multimodality: bool,
-    #[serde(default)]
-    pub supports_clicks: bool,
-    #[serde(default)]
-    pub supports_agent: bool,
-    #[serde(default)]
-    pub supports_reasoning: Option<String>,
-    #[serde(default)]
-    pub supports_boost_reasoning: bool,
-    #[serde(default)]
-    pub default_temperature: Option<f32>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct EmbeddingModelRecord {
-    #[serde(default)]
-    pub n_ctx: usize,
-    #[serde(default)]
-    pub size: i32,
-}
-
-#[derive(Debug, Deserialize)]
-pub struct ModelsOnly {
-    pub code_completion_models: IndexMap<String, ModelRecord>,
-    pub code_chat_models: IndexMap<String, ModelRecord>,
-    pub tokenizer_rewrite_path: HashMap<String, String>,
-}
-
-fn default_tokenizer_path_template() -> String {
-    String::from("https://huggingface.co/$MODEL/resolve/main/tokenizer.json")
-}
-
-fn default_telemetry_basic_dest() -> String {
-    String::from("https://www.smallcloud.ai/v1/telemetry-basic")
-}
-
-fn default_telemetry_basic_retrieve_my_own() -> String {
-    String::from("https://www.smallcloud.ai/v1/telemetry-retrieve-my-own-stats")
-}
-
-fn default_endpoint_style() -> String {
-    String::from("openai")
-}
-
-fn default_code_completion_n_ctx() -> usize {
-    2048
-}
-
-fn default_endpoint_embeddings_style() -> String {
-    String::from("openai")
-}
-
-fn default_support_metadata() -> bool { false }
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct CodeAssistantCaps {
-    pub cloud_name: String,
-
-    #[serde(default = "default_endpoint_style")]
-    pub endpoint_style: String,
-    #[serde(default)]
-    pub chat_endpoint_style: String,
-    #[serde(default = "default_endpoint_style")]
-    pub completion_endpoint_style: String,
-
-    #[serde(default)]
-    pub endpoint_template: String,
-    #[serde(default)]
-    pub completion_endpoint: String,
-    #[serde(default)]
-    pub chat_endpoint: String,
-
-    // default api key is in the command line
-    #[serde(default)]
-    pub completion_apikey: String,
-    #[serde(default)]
-    pub chat_apikey: String,
-    #[serde(default)]
-    pub embedding_apikey: String,
-
-    #[serde(default)]
-    pub endpoint_chat_passthrough: String,
-    #[serde(default = "default_tokenizer_path_template")]
-    pub tokenizer_path_template: String,
-    #[serde(default)]
-    pub tokenizer_rewrite_path: HashMap<String, String>,
-    #[serde(default = "default_telemetry_basic_dest")]
-    pub telemetry_basic_dest: String,
-    #[serde(default = "default_telemetry_basic_retrieve_my_own")]
-    pub telemetry_basic_retrieve_my_own: String,
-    #[serde(default)]
-    pub code_completion_models: IndexMap<String, ModelRecord>,
-    #[serde(default)]
-    #[serde(alias = "completion_model")]
-    pub code_completion_default_model: String,
-    #[serde(default)]
-    #[serde(alias = "multiline_completion_model")]
-    pub multiline_code_completion_default_model: String,
-    #[serde(default = "default_code_completion_n_ctx")]
-    #[serde(alias = "completion_n_ctx")]
-    pub code_completion_n_ctx: usize,
-    #[serde(default)]
-    pub code_chat_models: IndexMap<String, ModelRecord>,
-    #[serde(default)]
-    #[serde(alias = "chat_model")]
-    pub code_chat_default_model: String,
-    #[serde(default)]
-    pub models_dict_patch: HashMap<String, ModelRecord>,
-    #[serde(default)]
-    #[serde(alias = "default_embeddings_model")]
-    pub embedding_model: String,
-    #[serde(default)]
-    #[serde(alias = "embedding_endpoint")]
-    pub endpoint_embeddings_template: String,
-    #[serde(default = "default_endpoint_embeddings_style")]
-    #[serde(alias = "embedding_endpoint_style")]
-    pub endpoint_embeddings_style: String,
-    #[serde(default)]
-    #[serde(alias = "size_embeddings")]
-    pub embedding_size: i32,
-    #[serde(default)]
-    pub embedding_batch: usize,
-    #[serde(default)]
-    pub embedding_n_ctx: usize,
-    #[serde(default)]
-    pub running_models: Vec<String>,  // check there if a model is available or not, not in other places
-    #[serde(default)]
-    pub caps_version: i64,  // need to reload if it increases on server, that happens when server configuration changes
-    #[serde(default)]
-    pub code_chat_default_system_prompt: String,
-
-    #[serde(default)]
-    pub customization: String,  // on self-hosting server, allows to customize yaml_configs & friends for all engineers
-
-    #[serde(default = "default_support_metadata")]
-    pub support_metadata: bool,
-}
-
-#[derive(Debug, Deserialize, Clone, Default)]
-pub struct CodeAssistantCapsCompletion {
-    pub endpoint: String,
-    pub models: IndexMap<String, ModelRecord>,
-    pub default_model: String,
-    pub default_multiline_model: String,
-}
-
-#[derive(Debug, Deserialize, Clone, Default)]
-pub struct CodeAssistantCapsChat {
-    pub endpoint: String,
-    pub models: IndexMap<String, ModelRecord>,
-    pub default_model: String,
-}
-
-#[derive(Debug, Deserialize, Clone, Default)]
-pub struct CodeAssistantCapsEmbedding {
-    pub endpoint: String,
-    pub models: IndexMap<String, EmbeddingModelRecord>,
-    pub default_model: String,
-}
-
-#[derive(Debug, Deserialize, Clone, Default)]
-pub struct CodeAssistantCapsTelemetryEndpoints {
-    pub telemetry_basic_endpoint: String,
-    pub telemetry_corrected_snippets_endpoint: String,
-    pub telemetry_basic_retrieve_my_own_endpoint: String,
-}
-
-#[derive(Debug, Deserialize, Clone, Default)]
-pub struct CodeAssistantCapsV2 {
-    pub cloud_name: String,
-
-    pub completion: CodeAssistantCapsCompletion,
-    pub chat: CodeAssistantCapsChat,
-    pub embedding: CodeAssistantCapsEmbedding,
-
-    pub telemetry_endpoints: CodeAssistantCapsTelemetryEndpoints,
-    pub tokenizer_endpoints: HashMap<String, String>,
-
-    #[serde(default)]
-    pub customization: String,
-    #[serde(default)]
-    pub default_system_prompt: String,
-
-    pub caps_version: i64,
-}
-
-fn load_caps_from_buf(
-    buffer: &String,
-    caps_url: &String,
-) -> Result<Arc<StdRwLock<CodeAssistantCaps>>, String> {
-    let mut r1_mb_error_text = "".to_string();
-
-    let r1_mb: Option<CodeAssistantCaps> = match serde_json::from_str(&buffer) {
-        Ok(v) => v,
-        Err(e) => {
-            // incorrect json
-            if buffer.trim_start().starts_with(&['{', '[']) {
-                r1_mb_error_text = format!("{}", e);
-                None
-            } else {
-                match serde_yaml::from_str(&buffer) {
-                    Ok(v) => v,
-                    Err(e) => {
-                        r1_mb_error_text = format!("{}", e);
-                        None
-                    }
-                }
-            }
-        }
-    };
-    let mut r1 = r1_mb.ok_or(format!("failed to parse caps: {}", r1_mb_error_text))?;
-
-    let r0: ModelsOnly = serde_json::from_str(&KNOWN_MODELS).map_err(|e| {
-        let up_to_line = KNOWN_MODELS.lines().take(e.line()).collect::<Vec<&str>>().join("\n");
-        error!("{}\nfailed to parse KNOWN_MODELS: {}", up_to_line, e);
-        format!("failed to parse KNOWN_MODELS: {}", e)
-    })?;
-
-    if !r1.code_chat_default_model.is_empty() && !r1.running_models.contains(&r1.code_chat_default_model) {
-        r1.running_models.push(r1.code_chat_default_model.clone());
-    }
-    if !r1.code_completion_default_model.is_empty() && !r1.running_models.contains(&r1.code_completion_default_model) {
-        r1.running_models.push(r1.code_completion_default_model.clone());
-    }
-    if !r1.multiline_code_completion_default_model.is_empty() && !r1.running_models.contains(&r1.multiline_code_completion_default_model) {
-        r1.running_models.push(r1.multiline_code_completion_default_model.clone());
-    }
-    if !r1.embedding_model.is_empty() && !r1.running_models.contains(&r1.embedding_model) {
-        r1.running_models.push(r1.embedding_model.clone());
-    }
-
-    _inherit_r1_from_r0(&mut r1, &r0);
-    apply_models_dict_patch(&mut r1);
-    r1.endpoint_template = relative_to_full_url(&caps_url, &r1.endpoint_template)?;
-    r1.endpoint_chat_passthrough = relative_to_full_url(&caps_url, &r1.endpoint_chat_passthrough)?;
-    if r1.endpoint_chat_passthrough.is_empty() {
-        r1.endpoint_chat_passthrough = relative_to_full_url(&caps_url, &r1.chat_endpoint)?;
-    }
-    r1.telemetry_basic_dest = relative_to_full_url(&caps_url, &r1.telemetry_basic_dest)?;
-    r1.telemetry_basic_retrieve_my_own = relative_to_full_url(&caps_url, &r1.telemetry_basic_retrieve_my_own)?;
-    r1.endpoint_embeddings_template = relative_to_full_url(&caps_url, &r1.endpoint_embeddings_template)?;
-    r1.tokenizer_path_template = relative_to_full_url(&caps_url, &r1.tokenizer_path_template)?;
-    if r1.embedding_n_ctx == 0 {
-        r1.embedding_n_ctx = 512;
-    }
-
-    // info!("caps {} completion models", r1.code_completion_models.len());
-    // info!("caps default completion model: \"{}\"", r1.code_completion_default_model);
-    // info!("caps {} chat models", r1.code_chat_models.len());
-    // info!("caps default chat model: \"{}\"", r1.code_chat_default_model);
-    // info!("running models: {:?}", r1.running_models);
-    // info!("code_chat_models models: {:?}", r1.code_chat_models);
-    // info!("code completion models: {:?}", r1.code_completion_models);
-    Ok(Arc::new(StdRwLock::new(r1)))
-}
-
-fn load_caps_from_buf_v2(
-    buffer: &String,
-    caps_url: &String,
-) -> Result<Arc<StdRwLock<CodeAssistantCaps>>, String> {
-    // Try to parse as V2 format
-    let caps_v2: CodeAssistantCapsV2 = match serde_json::from_str(buffer) {
-        Ok(v) => v,
-        Err(_) => return Err("failed to load in v2 format".to_string()),
-    };
-
-    // Convert V2 to V1 format
-    let mut caps = CodeAssistantCaps {
-        cloud_name: caps_v2.cloud_name,
-        endpoint_style: "openai".to_string(),
-        chat_endpoint_style: "openai".to_string(),
-        completion_endpoint_style: "openai".to_string(),
-        endpoint_embeddings_style: "openai".to_string(),
-
-        // Completion related fields
-        completion_endpoint: relative_to_full_url(&caps_url, &caps_v2.completion.endpoint)?,
-        code_completion_models: caps_v2.completion.models.clone(),
-        code_completion_default_model: caps_v2.completion.default_model.clone(),
-        multiline_code_completion_default_model: caps_v2.completion.default_multiline_model.clone(),
-
-        // Chat related fields
-        chat_endpoint: relative_to_full_url(&caps_url, &caps_v2.completion.endpoint)?,  // for completion-based chat
-        endpoint_chat_passthrough: relative_to_full_url(&caps_url, &caps_v2.chat.endpoint)?,
-        code_chat_models: caps_v2.chat.models.clone(),
-        code_chat_default_model: caps_v2.chat.default_model.clone(),
-
-        // Embeddings related fields
-        endpoint_embeddings_template: relative_to_full_url(&caps_url, &caps_v2.embedding.endpoint)?,
-        embedding_model: caps_v2.embedding.default_model.clone(),
-        embedding_n_ctx: caps_v2.embedding.models.get(&caps_v2.embedding.default_model).cloned().unwrap_or_default().n_ctx,
-        embedding_size: caps_v2.embedding.models.get(&caps_v2.embedding.default_model).cloned().unwrap_or_default().size,
-
-        // Telemetry endpoints
-        telemetry_basic_dest: relative_to_full_url(&caps_url, &caps_v2.telemetry_endpoints.telemetry_basic_endpoint)?,
-        telemetry_basic_retrieve_my_own: relative_to_full_url(&caps_url, &caps_v2.telemetry_endpoints.telemetry_basic_retrieve_my_own_endpoint)?,
-
-        tokenizer_path_template: "".to_string(),
-        tokenizer_rewrite_path: {
-            let mut rewritten_paths = HashMap::new();
-            for (key, endpoint) in caps_v2.tokenizer_endpoints {
-                let full_url = relative_to_full_url(&caps_url, &endpoint)?;
-                rewritten_paths.insert(key, full_url);
-            }
-            rewritten_paths
-        },
-
-        // Version
-        caps_version: caps_v2.caps_version,
-
-        // Collect all models from completion and chat sections
-        running_models: {
-            let mut models = std::collections::HashSet::new();
-            models.extend(caps_v2.completion.models.keys().cloned());
-            models.extend(caps_v2.chat.models.keys().cloned());
-            // models.extend(caps_v2.embedding.models.keys().cloned());
-            models.into_iter().collect()
-        },
-
-        customization: caps_v2.customization.clone(),
-        code_chat_default_system_prompt: caps_v2.default_system_prompt.clone(),
-
-        ..Default::default()
-    };
-
-    // Convert relative URLs to absolute URLs
-    caps.endpoint_embeddings_template = relative_to_full_url(&caps_url, &caps.endpoint_embeddings_template)?;
-    caps.chat_endpoint = relative_to_full_url(&caps_url, &caps.chat_endpoint)?;
-    caps.telemetry_basic_dest = relative_to_full_url(&caps_url, &caps.telemetry_basic_dest)?;
-    caps.telemetry_basic_retrieve_my_own = relative_to_full_url(&caps_url, &caps.telemetry_basic_retrieve_my_own)?;
-
-    // Set default embedding context size if not set
-    if caps.embedding_n_ctx == 0 {
-        caps.embedding_n_ctx = 512;
-    }
-
-    Ok(Arc::new(StdRwLock::new(caps)))
-}
-
-macro_rules! get_api_key_macro {
-    ($gcx:expr, $caps:expr, $field:ident) => {{
-        let cx_locked = $gcx.read().await;
-        let custom_apikey = $caps.read().unwrap().$field.clone();
-        if custom_apikey.is_empty() {
-            cx_locked.cmdline.api_key.clone()
-        } else if custom_apikey.starts_with("$") {
-            let env_var_name = &custom_apikey[1..];
-            match std::env::var(env_var_name) {
-                Ok(env_value) => env_value,
-                Err(e) => {
-                    error!("tried to read API key from env var {}, but failed: {}\nTry editing ~/.config/refact/bring-your-own-key.yaml", env_var_name, e);
-                    cx_locked.cmdline.api_key.clone()
-                }
-            }
-        } else {
-            custom_apikey
-        }
-    }};
-}
-
-pub async fn get_api_key(
-    gcx: Arc<ARwLock<GlobalContext>>,
-    use_this_fall_back_to_default_if_empty: String,
-) -> String {
-    let gcx_locked = gcx.write().await;
-    if use_this_fall_back_to_default_if_empty.is_empty() {
-        gcx_locked.cmdline.api_key.clone()
-    } else if use_this_fall_back_to_default_if_empty.starts_with("$") {
-        let env_var_name = &use_this_fall_back_to_default_if_empty[1..];
-        match std::env::var(env_var_name) {
-            Ok(env_value) => env_value,
-            Err(e) => {
-                error!("tried to read API key from env var {}, but failed: {}\nTry editing ~/.config/refact/bring-your-own-key.yaml", env_var_name, e);
-                gcx_locked.cmdline.api_key.clone()
-            }
-        }
-    } else {
-        use_this_fall_back_to_default_if_empty
-    }
-}
-
-#[allow(dead_code)]
-async fn get_custom_chat_api_key(gcx: Arc<ARwLock<GlobalContext>>) -> Result<String, ScratchError> {
-    let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
-    Ok(get_api_key_macro!(gcx, caps, chat_apikey))
-}
-
-#[cfg(feature="vecdb")]
-pub async fn get_custom_embedding_api_key(gcx: Arc<ARwLock<GlobalContext>>) -> Result<String, ScratchError> {
-    let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
-    Ok(get_api_key_macro!(gcx, caps, embedding_apikey))
-}
-
-#[allow(dead_code)]
-async fn get_custom_completion_api_key(gcx: Arc<ARwLock<GlobalContext>>) -> Result<String, ScratchError> {
-    let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
-    Ok(get_api_key_macro!(gcx, caps, completion_apikey))
-}
-
-
-async fn load_caps_buf_from_file(
-    cmdline: crate::global_context::CommandLine,
-    gcx: Arc<ARwLock<GlobalContext>>,
-) -> Result<(String, String), String> {
-    let mut caps_url = cmdline.address_url.clone();
-    if caps_url.is_empty() {
-        let config_dir = {
-            let gcx_locked = gcx.read().await;
-            gcx_locked.config_dir.clone()
-        };
-        let caps_path = PathBuf::from(config_dir).join("bring-your-own-key.yaml");
-        caps_url = caps_path.to_string_lossy().into_owned();
-        // info!("will use {} as the caps file", caps_url);
-    }
-    let mut buffer = String::new();
-    let mut file = File::open(caps_url.clone()).map_err(|_| format!("failed to open file '{}'", caps_url))?;
-    file.read_to_string(&mut buffer).map_err(|_| format!("failed to read file '{}'", caps_url))?;
-    Ok((buffer, caps_url))
-}
-
-async fn load_caps_buf_from_url(
-    cmdline: crate::global_context::CommandLine,
-    gcx: Arc<ARwLock<GlobalContext>>,
-) -> Result<(String, String), String> {
-    let mut buffer = String::new();
-    let mut caps_urls: Vec<String> = Vec::new();
-    if cmdline.address_url.to_lowercase() == "refact" {
-        caps_urls.push("https://inference.smallcloud.ai/coding_assistant_caps.json".to_string());
-    } else {
-        let base_url = Url::parse(&cmdline.address_url.clone()).map_err(|_| "failed to parse address url (1)".to_string())?;
-        let joined_url = base_url.join(&CAPS_FILENAME).map_err(|_| "failed to parse address url (2)".to_string())?;
-        let joined_url_fallback = base_url.join(&CAPS_FILENAME_FALLBACK).map_err(|_| "failed to parse address url (2)".to_string())?;
-        caps_urls.push(joined_url.to_string());
-        caps_urls.push(joined_url_fallback.to_string());
-    }
-
-    let http_client = gcx.read().await.http_client.clone();
-    let api_key = cmdline.api_key.clone();
-    let mut headers = reqwest::header::HeaderMap::new();
-    if !api_key.is_empty() {
-        headers.insert(reqwest::header::AUTHORIZATION, reqwest::header::HeaderValue::from_str(format!("Bearer {}", api_key).as_str()).unwrap());
-        headers.insert(reqwest::header::USER_AGENT, reqwest::header::HeaderValue::from_str(format!("refact-lsp {}", crate::version::build_info::PKG_VERSION).as_str()).unwrap());
-    }
-
-    let mut status: u16 = 0;
-    for url in caps_urls.iter() {
-        info!("fetching caps from {}", url);
-        let response = http_client.get(url).headers(headers.clone()).send().await.map_err(|e| format!("{}", e))?;
-        status = response.status().as_u16();
-        buffer = match response.text().await {
-            Ok(v) => v,
-            Err(_) => continue
-        };
-
-        if status == 200 {
-            break;
-        }
-
-        warn!("status={}; server responded with:\n{}", status, buffer);
-    }
-    if status != 200 {
-        let response_json: serde_json::Result<Value> = serde_json::from_str(&buffer);
-        return if let Ok(response_json) = response_json {
-            if let Some(detail) = response_json.get("detail") {
-                Err(detail.as_str().unwrap().to_string())
-            } else {
-                Err(format!("cannot fetch caps, status={}", status))
-            }
-        } else {
-            Err(format!("cannot fetch caps, status={}", status))
-        };
-    }
-
-    let caps_url: String = match caps_urls.get(0) {
-        Some(u) => u.clone(),
-        None => return Err("caps_url is none".to_string())
-    };
-
-    Ok((buffer, caps_url))
-}
-
-pub async fn load_caps(
-    cmdline: crate::global_context::CommandLine,
-    gcx: Arc<ARwLock<GlobalContext>>,
-) -> Result<Arc<StdRwLock<CodeAssistantCaps>>, String> {
-    let mut caps_url = cmdline.address_url.clone();
-    let buf: String;
-    if caps_url.to_lowercase() == "refact" || caps_url.starts_with("http") {
-        (buf, caps_url) = load_caps_buf_from_url(cmdline, gcx).await?
-    } else {
-        (buf, caps_url) = load_caps_buf_from_file(cmdline, gcx).await?
-    }
-    match load_caps_from_buf_v2(&buf, &caps_url) {
-        Ok(caps) => Ok(caps),
-        Err(e) => {
-            info!("Cannot load v2 caps: `{}`, try old format", e);
-            load_caps_from_buf(&buf, &caps_url)
-        }
-    }
-}
-
-pub fn strip_model_from_finetune(model: &String) -> String {
-    model.split(":").next().unwrap().to_string()
-}
-
-fn relative_to_full_url(
-    caps_url: &String,
-    maybe_relative_url: &str,
-) -> Result<String, String> {
-    if maybe_relative_url.starts_with("http") {
-        Ok(maybe_relative_url.to_string())
-    } else if maybe_relative_url.is_empty() {
-        Ok("".to_string())
-    } else {
-        let base_url = Url::parse(caps_url.as_str()).map_err(|_| "failed to parse address url (3)".to_string())?;
-        let joined_url = base_url.join(maybe_relative_url).map_err(|_| "failed to join URL \"{}\" and possibly relative \"{}\"".to_string())?;
-        Ok(joined_url.to_string())
-    }
-}
-
-fn apply_models_dict_patch(caps: &mut CodeAssistantCaps) {
-    fn apply_model_record_patch(rec: &mut ModelRecord, rec_patched: &ModelRecord) {
-        if rec_patched.n_ctx != 0 {
-            rec.n_ctx = rec_patched.n_ctx;
-        }
-        if rec_patched.supports_tools {
-            rec.supports_tools = rec_patched.supports_tools;
-        }
-        if rec_patched.supports_multimodality {
-            rec.supports_multimodality = rec_patched.supports_multimodality;
-        }
-        if rec_patched.supports_tools {
-            rec.supports_tools = rec_patched.supports_tools;
-        }
-    }
-
-    for (model, rec_patched) in caps.models_dict_patch.iter() {
-        if let Some(rec) = caps.code_completion_models.get_mut(model) {
-            apply_model_record_patch(rec, rec_patched);
-        }
-        if let Some(rec) = caps.code_chat_models.get_mut(model) {
-            apply_model_record_patch(rec, rec_patched);
-        }
-    }
-}
-
-fn _inherit_r1_from_r0(
-    r1: &mut CodeAssistantCaps,
-    r0: &ModelsOnly,
-) {
-    // XXX: only patches running models, patch all?
-    for k in r1.running_models.iter() {
-        let k_stripped = strip_model_from_finetune(k);
-
-        for (rec_name, rec) in r0.code_completion_models.iter() {
-            if rec_name == &k_stripped || rec.similar_models.contains(&k_stripped) {
-                r1.code_completion_models.insert(k.to_string(), rec.clone());
-            }
-        }
-
-        for (rec_name, rec) in r0.code_chat_models.iter() {
-            if rec_name == &k_stripped || rec.similar_models.contains(&k_stripped) {
-                r1.code_chat_models.insert(k.to_string(), rec.clone());
-            }
-        }
-    }
-
-    for k in r1.running_models.iter() {
-        if !r1.code_completion_models.contains_key(k) && !r1.code_chat_models.contains_key(k) && *k != r1.embedding_model {
-            warn!("indicated as running, unknown model {:?}, maybe update this rust binary", k);
-        }
-    }
-
-    for k in r0.tokenizer_rewrite_path.keys() {
-        if !r1.tokenizer_rewrite_path.contains_key(k) {
-            r1.tokenizer_rewrite_path.insert(k.to_string(), r0.tokenizer_rewrite_path[k].clone());
-        }
-    }
-}
-
-pub fn which_model_to_use<'a>(
-    models: &'a IndexMap<String, ModelRecord>,
-    user_wants_model: &str,
-    default_model: &str,
-) -> Result<(String, &'a ModelRecord), String> {
-    let mut take_this_one = default_model;
-    if user_wants_model != "" {
-        take_this_one = user_wants_model;
-    }
-    let no_finetune = strip_model_from_finetune(&take_this_one.to_string());
-    if let Some(model_rec) = models.get(&take_this_one.to_string()) {
-        Ok((take_this_one.to_string(), model_rec))
-    } else if let Some(model_rec) = models.get(&no_finetune) {
-        Ok((take_this_one.to_string(), model_rec))
-    } else {
-        Err(format!(
-            "Model '{}' not found. Server has these models: {:?}",
-            take_this_one,
-            models.keys()
-        ))
-    }
-}
-
-pub fn which_scratchpad_to_use<'a>(
-    scratchpads: &'a HashMap<String, serde_json::Value>,
-    user_wants_scratchpad: &str,
-    default_scratchpad: &str,
-) -> Result<(String, &'a serde_json::Value), String> {
-    let mut take_this_one = default_scratchpad;
-    if user_wants_scratchpad != "" {
-        take_this_one = user_wants_scratchpad;
-    }
-    if default_scratchpad == "" {
-        if scratchpads.len() == 1 {
-            let key = scratchpads.keys().next().unwrap();
-            return Ok((key.clone(), &scratchpads[key]));
-        } else {
-            return Err(format!(
-                "There is no default scratchpad defined, requested scratchpad is empty. The model supports these scratchpads: {:?}",
-                scratchpads.keys()
-            ));
-        }
-    }
-    if let Some(scratchpad_patch) = scratchpads.get(take_this_one) {
-        return Ok((take_this_one.to_string(), scratchpad_patch));
-    } else {
-        return Err(format!(
-            "Scratchpad '{}' not found. The model supports these scratchpads: {:?}",
-            take_this_one,
-            scratchpads.keys()
-        ));
-    }
-}
-
-pub async fn get_model_record(
-    gcx: Arc<ARwLock<GlobalContext>>,
-    model: &str,
-) -> Result<ModelRecord, String> {
-    let caps = crate::global_context::try_load_caps_quickly_if_not_present(
-        gcx.clone(), 0,
-    ).await.map_err(|e| {
-        warn!("no caps: {:?}", e);
-        format!("failed to load caps: {}", e)
-    })?;
-
-    let caps_lock = caps.read().unwrap();
-    match caps_lock.code_chat_models.get(model) {
-        Some(res) => Ok(res.clone()),
-        None => Err(format!("no model record for model `{}`", model))
-    }
-}
-
-
-pub const BRING_YOUR_OWN_KEY_SAMPLE: &str = r#"
-cloud_name: My own mix of clouds!
-
-chat_endpoint: "https://api.openai.com/v1/chat/completions"
-chat_apikey: "$OPENAI_API_KEY"           # Will work if you have it in global environment variables, but better use the real sk-... key
-chat_model: gpt-4o-mini
-
-embedding_endpoint: "https://api.openai.com/v1/embeddings"
-embedding_apikey: "$OPENAI_API_KEY"
-embedding_model: text-embedding-3-small
-embedding_size: 1536
-
-# completion_endpoint: "https://api-inference.huggingface.co/models/$MODEL"
-# completion_endpoint_style: "hf"
-# completion_apikey: "hf_..."    # or use $HF_TOKEN if you have it in global environment variables
-# completion_model: bigcode/starcoder2-3b
-
-running_models:   # all models mentioned in *_model are automatically running, but you can add more
-  - gpt-4o-mini
-  - gpt-4o
-
-# More examples https://github.com/smallcloudai/refact-lsp/tree/dev/bring_your_own_key
-
-# Refact sends basic telemetry (counters and errors), you can send it to a different address (a Refact self-hosting server is especially useful) or set to an empty string for no telemetry.
-# telemetry_basic_dest: <your-telemetry-address>             # default: https://www.smallcloud.ai/v1/telemetry-basic
-# telemetry_basic_retrieve_my_own: <your-telemetry-address>  # default: https://www.smallcloud.ai/v1/telemetry-retrieve-my-own-stats
-"#;
\ No newline at end of file
diff --git a/refact-agent/engine/src/caps/caps.rs b/refact-agent/engine/src/caps/caps.rs
new file mode 100644
index 000000000..208178079
--- /dev/null
+++ b/refact-agent/engine/src/caps/caps.rs
@@ -0,0 +1,432 @@
+use std::sync::Arc;
+
+use indexmap::IndexMap;
+use serde::Deserialize;
+use serde::Serialize;
+use tokio::sync::RwLock as ARwLock;
+use url::Url;
+use tracing::{info, warn};
+
+use crate::custom_error::MapErrToString;
+use crate::global_context::CommandLine;
+use crate::global_context::GlobalContext;
+use crate::caps::providers::{add_models_to_caps, read_providers_d, resolve_provider_api_key,
+    post_process_provider, CapsProvider};
+use crate::caps::self_hosted::SelfHostedCaps;
+
+pub const CAPS_FILENAME: &str = "refact-caps";
+pub const CAPS_FILENAME_FALLBACK: &str = "coding_assistant_caps.json";
+
+#[derive(Debug, Serialize, Clone, Deserialize, Default, PartialEq)]
+pub struct BaseModelRecord {
+    #[serde(default)]
+    pub n_ctx: usize,
+
+    /// Actual model name, e.g. "gpt-4o"
+    #[serde(default)]
+    pub name: String,
+    /// provider/model_name, e.g. "openai/gpt-4o"
+    #[serde(skip_deserializing)]
+    pub id: String,
+
+    #[serde(default, skip_serializing)]
+    pub endpoint: String,
+    #[serde(default, skip_serializing)]
+    pub endpoint_style: String,
+    #[serde(default, skip_serializing)]
+    pub api_key: String,
+    #[serde(default, skip_serializing)]
+    pub tokenizer_api_key: String,
+
+    #[serde(default, skip_serializing)]
+    pub support_metadata: bool,
+    #[serde(default, skip_serializing)]
+    pub similar_models: Vec<String>,
+    #[serde(default)]
+    pub tokenizer: String,
+
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+    // Fields used for Config/UI management
+    #[serde(skip_deserializing)]
+    pub removable: bool,
+    #[serde(skip_deserializing)]
+    pub user_configured: bool,
+}
+
+fn default_true() -> bool { true }
+
+pub trait HasBaseModelRecord {
+    fn base(&self) -> &BaseModelRecord;
+    fn base_mut(&mut self) -> &mut BaseModelRecord;
+}
+
+#[derive(Debug, Serialize, Clone, Deserialize, Default)]
+pub struct ChatModelRecord {
+    #[serde(flatten)]
+    pub base: BaseModelRecord,
+
+    #[serde(default = "default_chat_scratchpad", skip_serializing)]
+    pub scratchpad: String,
+    #[serde(default, skip_serializing)]
+    pub scratchpad_patch: serde_json::Value,
+
+    #[serde(default)]
+    pub supports_tools: bool,
+    #[serde(default)]
+    pub supports_multimodality: bool,
+    #[serde(default)]
+    pub supports_clicks: bool,
+    #[serde(default)]
+    pub supports_agent: bool,
+    #[serde(default)]
+    pub supports_reasoning: Option<String>,
+    #[serde(default)]
+    pub supports_boost_reasoning: bool,
+    #[serde(default)]
+    pub default_temperature: Option<f32>,
+}
+
+pub fn default_chat_scratchpad() -> String { "PASSTHROUGH".to_string() }
+
+impl HasBaseModelRecord for ChatModelRecord {
+    fn base(&self) -> &BaseModelRecord { &self.base }
+    fn base_mut(&mut self) -> &mut BaseModelRecord { &mut self.base }
+}
+
+#[derive(Debug, Serialize, Clone, Deserialize, Default)]
+pub struct CompletionModelRecord {
+    #[serde(flatten)]
+    pub base: BaseModelRecord,
+
+    #[serde(default = "default_completion_scratchpad")]
+    pub scratchpad: String,
+    #[serde(default = "default_completion_scratchpad_patch")]
+    pub scratchpad_patch: serde_json::Value,
+
+    pub model_family: Option<CompletionModelFamily>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CompletionModelFamily {
+    #[serde(rename = "qwen2.5-coder-base")]
+    Qwen2_5CoderBase,
+    #[serde(rename = "starcoder")]
+    Starcoder,
+    #[serde(rename = "deepseek-coder")]
+    DeepseekCoder,
+}
+
+impl CompletionModelFamily {
+    pub fn to_string(self) -> String {
+        serde_json::to_value(self).ok()
+            .and_then(|v| v.as_str().map(|s| s.to_string())).unwrap_or_default()
+    }
+
+    pub fn all_variants() -> Vec<CompletionModelFamily> {
+        vec![
+            CompletionModelFamily::Qwen2_5CoderBase,
+            CompletionModelFamily::Starcoder,
+            CompletionModelFamily::DeepseekCoder,
+        ]
+    }
+}
+
+pub fn default_completion_scratchpad() -> String { "REPLACE_PASSTHROUGH".to_string() }
+
+pub fn default_completion_scratchpad_patch() -> serde_json::Value { serde_json::json!({
+    "context_format": "chat",
+    "rag_ratio": 0.5
+}) }
+
+impl HasBaseModelRecord for CompletionModelRecord {
+    fn base(&self) -> &BaseModelRecord { &self.base }
+    fn base_mut(&mut self) -> &mut BaseModelRecord { &mut self.base }
+}
+
+#[derive(Debug, Serialize, Clone, Default, PartialEq)]
+pub struct EmbeddingModelRecord {
+    #[serde(flatten)]
+    pub base: BaseModelRecord,
+
+    pub embedding_size: i32,
+    pub rejection_threshold: f32,
+    pub embedding_batch: usize,
+}
+
+pub fn default_rejection_threshold() -> f32 { 0.63 }
+
+pub fn default_embedding_batch() -> usize { 64 }
+
+impl HasBaseModelRecord for EmbeddingModelRecord {
+    fn base(&self) -> &BaseModelRecord { &self.base }
+    fn base_mut(&mut self) -> &mut BaseModelRecord { &mut self.base }
+}
+
+impl EmbeddingModelRecord {
+    pub fn is_configured(&self) -> bool {
+        !self.base.name.is_empty() && (self.embedding_size > 0 || self.embedding_batch > 0 || self.base.n_ctx > 0)
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct CodeAssistantCaps {
+    #[serde(deserialize_with = "normalize_string")]
+    pub cloud_name: String, // "refact" or "refact_self_hosted"
+
+    #[serde(default = "default_telemetry_basic_dest")]
+    pub telemetry_basic_dest: String,
+    #[serde(default = "default_telemetry_retrieve_my_own")]
+    pub telemetry_basic_retrieve_my_own: String,
+
+    #[serde(skip_deserializing)]
+    pub completion_models: IndexMap<String, Arc<CompletionModelRecord>>, // keys are "provider/model"
+    #[serde(skip_deserializing)]
+    pub chat_models: IndexMap<String, Arc<ChatModelRecord>>,
+    #[serde(skip_deserializing)]
+    pub embedding_model: EmbeddingModelRecord,
+
+    #[serde(flatten, skip_deserializing)]
+    pub defaults: DefaultModels,
+
+    #[serde(default)]
+    pub caps_version: i64,  // need to reload if it increases on server, that happens when server configuration changes
+
+    #[serde(default)]
+    pub customization: String,  // on self-hosting server, allows to customize yaml_configs & friends for all engineers
+
+    #[serde(default = "default_hf_tokenizer_template")]
+    pub hf_tokenizer_template: String,  // template for HuggingFace tokenizer URLs
+}
+
+fn default_telemetry_retrieve_my_own() -> String {
+    "https://www.smallcloud.ai/v1/telemetry-retrieve-my-own-stats".to_string()
+}
+
+pub fn default_hf_tokenizer_template() -> String {
+    "https://huggingface.co/$HF_MODEL/resolve/main/tokenizer.json".to_string()
+}
+
+fn default_telemetry_basic_dest() -> String {
+    "https://www.smallcloud.ai/v1/telemetry-basic".to_string()
+}
+
+pub fn normalize_string<'de, D: serde::Deserializer<'de>>(deserializer: D) -> Result<String, D::Error> {
+    let s: String = String::deserialize(deserializer)?;
+    Ok(s.chars().map(|c| if c.is_alphanumeric() { c.to_ascii_lowercase() } else { '_' }).collect())
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct DefaultModels {
+    #[serde(default, alias = "code_completion_default_model", alias = "completion_model")]
+    pub completion_default_model: String,
+    #[serde(default, alias = "code_chat_default_model", alias = "chat_model")]
+    pub chat_default_model: String,
+    #[serde(default)]
+    pub chat_thinking_model: String,
+    #[serde(default)]
+    pub chat_light_model: String,
+}
+
+impl DefaultModels {
+    pub fn apply_override(&mut self, other: &DefaultModels, provider_name: Option<&str>) {
+        if !other.completion_default_model.is_empty() {
+            self.completion_default_model = match provider_name {
+                Some(provider) => format!("{}/{}", provider, other.completion_default_model),
+                None => other.completion_default_model.clone(),
+            };
+        }
+        if !other.chat_default_model.is_empty() {
+            self.chat_default_model = match provider_name {
+                Some(provider) => format!("{}/{}", provider, other.chat_default_model),
+                None => other.chat_default_model.clone(),
+            };
+        }
+        if !other.chat_thinking_model.is_empty() {
+            self.chat_thinking_model = match provider_name {
+                Some(provider) => format!("{}/{}", provider, other.chat_thinking_model),
+                None => other.chat_thinking_model.clone(),
+            };
+        }
+        if !other.chat_light_model.is_empty() {
+            self.chat_light_model = match provider_name {
+                Some(provider) => format!("{}/{}", provider, other.chat_light_model),
+                None => other.chat_light_model.clone(),
+            };
+        }
+    }
+}
+
+pub async fn load_caps_value_from_url(
+    cmdline: CommandLine,
+    gcx: Arc<ARwLock<GlobalContext>>,
+) -> Result<(serde_json::Value, String), String> {
+    let caps_urls = if cmdline.address_url.to_lowercase() == "refact" {
+        vec!["https://inference.smallcloud.ai/coding_assistant_caps.json".to_string()]
+    } else {
+        let base_url = Url::parse(&cmdline.address_url)
+            .map_err(|_| "failed to parse address url".to_string())?;
+
+        vec![
+            base_url.join(&CAPS_FILENAME).map_err(|_| "failed to join caps URL".to_string())?.to_string(),
+            base_url.join(&CAPS_FILENAME_FALLBACK).map_err(|_| "failed to join fallback caps URL".to_string())?.to_string(),
+        ]
+    };
+
+    let http_client = gcx.read().await.http_client.clone();
+    let mut headers = reqwest::header::HeaderMap::new();
+
+    if !cmdline.api_key.is_empty() {
+        headers.insert(reqwest::header::AUTHORIZATION, reqwest::header::HeaderValue::from_str(&format!("Bearer {}", cmdline.api_key)).unwrap());
+        headers.insert(reqwest::header::USER_AGENT, reqwest::header::HeaderValue::from_str(&format!("refact-lsp {}", crate::version::build_info::PKG_VERSION)).unwrap());
+    }
+
+    let mut last_status = 0;
+    let mut last_response_json: Option<serde_json::Value> = None;
+
+    for url in &caps_urls {
+        info!("fetching caps from {}", url);
+        let response = http_client.get(url)
+            .headers(headers.clone())
+            .send()
+            .await
+            .map_err(|e| e.to_string())?;
+
+        last_status = response.status().as_u16();
+
+        if let Ok(json_value) = response.json::<serde_json::Value>().await {
+            if last_status == 200 {
+                return Ok((json_value, url.clone()));
+            }
+            last_response_json = Some(json_value.clone());
+            warn!("status={}; server responded with:\n{}", last_status, json_value);
+        }
+    }
+
+    if let Some(json_value) = last_response_json {
+        if let Some(detail) = json_value.get("detail").and_then(|d| d.as_str()) {
+            return Err(detail.to_string());
+        }
+    }
+
+    Err(format!("cannot fetch caps, status={}", last_status))
+}
+
+pub async fn load_caps(
+    cmdline: crate::global_context::CommandLine,
+    gcx: Arc<ARwLock<GlobalContext>>,
+) -> Result<Arc<CodeAssistantCaps>, String> {
+    let (config_dir, cmdline_api_key) = {
+        let gcx_locked = gcx.read().await;
+        (gcx_locked.config_dir.clone(), gcx_locked.cmdline.api_key.clone())
+    };
+
+    let (caps_value, caps_url) = load_caps_value_from_url(cmdline, gcx).await?;
+
+    let (mut caps, server_providers) = match serde_json::from_value::<SelfHostedCaps>(caps_value.clone()) {
+        Ok(self_hosted_caps) => (self_hosted_caps.into_caps(&caps_url, &cmdline_api_key)?, Vec::new()),
+        Err(_) => {
+            let caps = serde_json::from_value::<CodeAssistantCaps>(caps_value.clone())
+                .map_err_with_prefix("Failed to parse caps:")?;
+            let mut server_provider = serde_json::from_value::<CapsProvider>(caps_value)
+                .map_err_with_prefix("Failed to parse caps provider:")?;
+            resolve_relative_urls(&mut server_provider, &caps_url)?;
+            (caps, vec![server_provider])
+        }
+    };
+
+    caps.telemetry_basic_dest = relative_to_full_url(&caps_url, &caps.telemetry_basic_dest)?;
+    caps.telemetry_basic_retrieve_my_own = relative_to_full_url(&caps_url, &caps.telemetry_basic_retrieve_my_own)?;
+
+    let (mut providers, error_log) = read_providers_d(server_providers, &config_dir).await;
+    providers.retain(|p| p.enabled);
+    for e in error_log {
+        tracing::error!("{e}");
+    }
+    for provider in &mut providers {
+        post_process_provider(provider, false);
+        provider.api_key = resolve_provider_api_key(&provider, &cmdline_api_key);
+    }
+    add_models_to_caps(&mut caps, providers);
+
+    Ok(Arc::new(caps))
+}
+
+pub fn resolve_relative_urls(provider: &mut CapsProvider, caps_url: &str) -> Result<(), String> {
+    provider.chat_endpoint = relative_to_full_url(caps_url, &provider.chat_endpoint)?;
+    provider.completion_endpoint = relative_to_full_url(caps_url, &provider.completion_endpoint)?;
+    provider.embedding_endpoint = relative_to_full_url(caps_url, &provider.embedding_endpoint)?;
+    Ok(())
+}
+
+pub fn strip_model_from_finetune(model: &str) -> String {
+    model.split(":").next().unwrap().to_string()
+}
+
+pub fn relative_to_full_url(
+    caps_url: &str,
+    maybe_relative_url: &str,
+) -> Result<String, String> {
+    if maybe_relative_url.starts_with("http") {
+        Ok(maybe_relative_url.to_string())
+    } else if maybe_relative_url.is_empty() {
+        Ok("".to_string())
+    } else {
+        let base_url = Url::parse(caps_url)
+            .map_err(|_| format!("failed to parse caps url: {}", caps_url))?;
+        let joined_url = base_url.join(maybe_relative_url)
+            .map_err(|_| format!("failed to join url: {}", maybe_relative_url))?;
+        Ok(joined_url.to_string())
+    }
+}
+
+pub fn resolve_model<'a, T>(
+    models: &'a IndexMap<String, Arc<T>>,
+    model_id: &str,
+) -> Result<Arc<T>, String> {
+    models.get(model_id).or_else(
+        || models.get(&strip_model_from_finetune(model_id))
+    ).cloned().ok_or(format!("Model '{}' not found. Server has the following models: {:?}", model_id, models.keys()))
+}
+
+pub fn resolve_chat_model<'a>(
+    caps: Arc<CodeAssistantCaps>,
+    requested_model_id: &str,
+) -> Result<Arc<ChatModelRecord>, String> {
+    let model_id = if !requested_model_id.is_empty() {
+        requested_model_id
+    } else {
+        &caps.defaults.chat_default_model
+    };
+    resolve_model(&caps.chat_models, model_id)
+}
+
+pub fn resolve_completion_model<'a>(
+    caps: Arc<CodeAssistantCaps>,
+    requested_model_id: &str,
+    try_refact_fallbacks: bool,
+) -> Result<Arc<CompletionModelRecord>, String> {
+    let model_id = if !requested_model_id.is_empty() {
+        requested_model_id
+    } else {
+        &caps.defaults.completion_default_model
+    };
+
+    match resolve_model(&caps.completion_models, model_id) {
+        Ok(model) => Ok(model),
+        Err(first_err) if try_refact_fallbacks => {
+            if let Ok(model) = resolve_model(&caps.completion_models, &format!("refact/{model_id}")) {
+                return Ok(model);
+            }
+            if let Ok(model) = resolve_model(&caps.completion_models, &format!("refact_self_hosted/{model_id}")) {
+                return Ok(model);
+            }
+            Err(first_err)
+        }
+        Err(err) => Err(err),
+    }
+}
+
+pub fn is_cloud_model(model_id: &str) -> bool {
+    model_id.starts_with("refact/")
+}
diff --git a/refact-agent/engine/src/caps/mod.rs b/refact-agent/engine/src/caps/mod.rs
new file mode 100644
index 000000000..bc3e848db
--- /dev/null
+++ b/refact-agent/engine/src/caps/mod.rs
@@ -0,0 +1,5 @@
+pub mod caps;
+pub mod providers;
+pub mod self_hosted;
+
+pub use caps::*;
diff --git a/refact-agent/engine/src/caps/providers.rs b/refact-agent/engine/src/caps/providers.rs
new file mode 100644
index 000000000..f8a2b06f8
--- /dev/null
+++ b/refact-agent/engine/src/caps/providers.rs
@@ -0,0 +1,676 @@
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, OnceLock};
+
+use indexmap::IndexMap;
+use serde::{Deserialize, Serialize};
+use tokio::sync::RwLock as ARwLock;
+use structopt::StructOpt;
+
+use crate::caps::{
+    BaseModelRecord, ChatModelRecord, CodeAssistantCaps, CompletionModelRecord, DefaultModels,
+    EmbeddingModelRecord, HasBaseModelRecord, default_embedding_batch, default_rejection_threshold,
+    load_caps_value_from_url, resolve_relative_urls, strip_model_from_finetune, normalize_string
+};
+use crate::custom_error::{MapErrToString, YamlError};
+use crate::global_context::{CommandLine, GlobalContext};
+use crate::caps::self_hosted::SelfHostedCaps;
+
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct CapsProvider {
+    #[serde(alias = "cloud_name", default, deserialize_with = "normalize_string")]
+    pub name: String,
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+    #[serde(default = "default_true")]
+    pub supports_completion: bool,
+
+    #[serde(default = "default_endpoint_style")]
+    pub endpoint_style: String,
+
+    // This aliases are for backward compatibility with cloud and self-hosted caps
+    #[serde(default, alias = "endpoint_template")]
+    pub completion_endpoint: String,
+    #[serde(default, alias = "endpoint_chat_passthrough")]
+    pub chat_endpoint: String,
+    #[serde(default, alias = "endpoint_embeddings_template")]
+    pub embedding_endpoint: String,
+
+    #[serde(default)]
+    pub api_key: String,
+
+    #[serde(default)]
+    pub tokenizer_api_key: String,
+
+    #[serde(default)]
+    pub code_completion_n_ctx: usize,
+
+    #[serde(default)]
+    pub support_metadata: bool,
+
+    #[serde(default)]
+    pub completion_models: IndexMap<String, CompletionModelRecord>,
+    #[serde(default)]
+    pub chat_models: IndexMap<String, ChatModelRecord>,
+    #[serde(default, alias = "default_embeddings_model")]
+    pub embedding_model: EmbeddingModelRecord,
+
+    #[serde(default)]
+    pub models_dict_patch: IndexMap<String, serde_json::Value>, // Used to patch some params from cloud, like n_ctx for pro/free users
+
+    #[serde(flatten)]
+    pub defaults: DefaultModels,
+
+    #[serde(default)]
+    pub running_models: Vec<String>,
+}
+
+impl CapsProvider {
+    pub fn apply_override(&mut self, value: serde_yaml::Value) -> Result<(), String> {
+        set_field_if_exists::<bool>(&mut self.enabled, "enabled", &value)?;
+        set_field_if_exists::<String>(&mut self.endpoint_style, "endpoint_style", &value)?;
+        set_field_if_exists::<String>(&mut self.completion_endpoint, "completion_endpoint", &value)?;
+        set_field_if_exists::<String>(&mut self.chat_endpoint, "chat_endpoint", &value)?;
+        set_field_if_exists::<String>(&mut self.embedding_endpoint, "embedding_endpoint", &value)?;
+        set_field_if_exists::<String>(&mut self.api_key, "api_key", &value)?;
+        set_field_if_exists::<String>(&mut self.tokenizer_api_key, "tokenizer_api_key", &value)?;
+        set_field_if_exists::<EmbeddingModelRecord>(&mut self.embedding_model, "embedding_model", &value)?;
+        if value.get("embedding_model").is_some() {
+            self.embedding_model.base.removable = true;
+            self.embedding_model.base.user_configured = true;
+        }
+
+        extend_model_collection::<ChatModelRecord>(&mut self.chat_models, "chat_models", &value, &self.running_models)?;
+        extend_model_collection::<CompletionModelRecord>(&mut self.completion_models, "completion_models", &value, &self.running_models)?;
+        extend_collection::<Vec<String>>(&mut self.running_models, "running_models", &value)?;
+
+        match serde_yaml::from_value::<DefaultModels>(value) {
+            Ok(default_models) => {
+                self.defaults.apply_override(&default_models, None);
+            },
+            Err(e) => return Err(e.to_string()),
+        }
+
+        Ok(())
+    }
+}
+
+fn set_field_if_exists<T: for<'de> serde::Deserialize<'de>>(
+    target: &mut T, field: &str, value: &serde_yaml::Value
+) -> Result<(), String> {
+    if let Some(val) = value.get(field) {
+        *target = serde_yaml::from_value(val.clone())
+            .map_err(|_| format!("Field '{}' has incorrect type", field))?;
+    }
+    Ok(())
+}
+
+fn extend_collection<C: for<'de> serde::Deserialize<'de> + Extend<C::Item> + IntoIterator>(
+    target: &mut C, field: &str, value: &serde_yaml::Value
+) -> Result<(), String> {
+    if let Some(value) = value.get(field) {
+        let imported_collection = serde_yaml::from_value::<C>(value.clone())
+            .map_err(|_| format!("Invalid format for {field}"))?;
+
+        target.extend(imported_collection);
+    }
+    Ok(())
+}
+
+// Special implementation for ChatModelRecord and CompletionModelRecord collections
+// that sets removable=true for newly added models
+fn extend_model_collection<T: for<'de> serde::Deserialize<'de> + HasBaseModelRecord>(
+    target: &mut IndexMap<String, T>, field: &str, value: &serde_yaml::Value, prev_running_models: &Vec<String>
+) -> Result<(), String> {
+    if let Some(value) = value.get(field) {
+        let imported_collection = serde_yaml::from_value::<IndexMap<String, T>>(value.clone())
+            .map_err(|_| format!("Invalid format for {field}"))?;
+
+        for (key, mut model) in imported_collection {
+            model.base_mut().user_configured = true;
+            if !target.contains_key(&key) && !prev_running_models.contains(&key) {
+                model.base_mut().removable = true;
+            }
+            target.insert(key, model);
+        }
+    }
+    Ok(())
+}
+
+fn default_endpoint_style() -> String { "openai".to_string() }
+
+fn default_true() -> bool { true }
+
+impl<'de> serde::Deserialize<'de> for EmbeddingModelRecord {
+    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error>
+    {
+        #[derive(Deserialize)]
+        #[serde(untagged)]
+        enum Input {
+            String(String),
+            Full(EmbeddingModelRecordHelper),
+        }
+
+        #[derive(Deserialize)]
+        struct EmbeddingModelRecordHelper {
+            #[serde(flatten)]
+            base: BaseModelRecord,
+            #[serde(default)]
+            embedding_size: i32,
+            #[serde(default = "default_rejection_threshold")]
+            rejection_threshold: f32,
+            #[serde(default = "default_embedding_batch")]
+            embedding_batch: usize,
+        }
+
+        match Input::deserialize(deserializer)? {
+            Input::String(name) => Ok(EmbeddingModelRecord {
+                base: BaseModelRecord { name, ..Default::default() },
+                ..Default::default()
+            }),
+            Input::Full(mut helper) => {
+                if helper.embedding_batch > 256 {
+                    tracing::warn!("embedding_batch can't be higher than 256");
+                    helper.embedding_batch = default_embedding_batch();
+                }
+
+                Ok(EmbeddingModelRecord {
+                    base: helper.base,
+                    embedding_batch: helper.embedding_batch,
+                    rejection_threshold: helper.rejection_threshold,
+                    embedding_size: helper.embedding_size,
+                })
+            },
+        }
+    }
+}
+
+#[derive(Deserialize, Default, Debug)]
+pub struct ModelDefaultSettingsUI {
+    #[serde(default)]
+    pub chat: ChatModelRecord,
+    #[serde(default)]
+    pub completion: CompletionModelRecord,
+    #[serde(default)]
+    pub embedding: EmbeddingModelRecord,
+}
+
+const PROVIDER_TEMPLATES: &[(&str, &str)] = &[
+    ("anthropic", include_str!("../yaml_configs/default_providers/anthropic.yaml")),
+    ("custom", include_str!("../yaml_configs/default_providers/custom.yaml")),
+    ("deepseek", include_str!("../yaml_configs/default_providers/deepseek.yaml")),
+    ("google_gemini", include_str!("../yaml_configs/default_providers/google_gemini.yaml")),
+    ("groq", include_str!("../yaml_configs/default_providers/groq.yaml")),
+    ("lmstudio", include_str!("../yaml_configs/default_providers/lmstudio.yaml")),
+    ("ollama", include_str!("../yaml_configs/default_providers/ollama.yaml")),
+    ("openai", include_str!("../yaml_configs/default_providers/openai.yaml")),
+    ("openrouter", include_str!("../yaml_configs/default_providers/openrouter.yaml")),
+    ("xai", include_str!("../yaml_configs/default_providers/xai.yaml")),
+];
+static PARSED_PROVIDERS: OnceLock<IndexMap<String, CapsProvider>> = OnceLock::new();
+static PARSED_MODEL_DEFAULTS: OnceLock<IndexMap<String, ModelDefaultSettingsUI>> = OnceLock::new();
+
+pub fn get_provider_templates() -> &'static IndexMap<String, CapsProvider> {
+    PARSED_PROVIDERS.get_or_init(|| {
+        let mut map = IndexMap::new();
+        for (name, yaml) in PROVIDER_TEMPLATES {
+            if let Ok(mut provider) = serde_yaml::from_str::<CapsProvider>(yaml) {
+                provider.name = name.to_string();
+                map.insert(name.to_string(), provider);
+            } else {
+                panic!("Failed to parse template for provider {}", name);
+            }
+        }
+        map
+    })
+}
+
+pub fn get_provider_model_default_settings_ui() -> &'static IndexMap<String, ModelDefaultSettingsUI> {
+    PARSED_MODEL_DEFAULTS.get_or_init(|| {
+        let mut map = IndexMap::new();
+        for (name, yaml) in PROVIDER_TEMPLATES {
+            let yaml_value = serde_yaml::from_str::<serde_yaml::Value>(yaml)
+                .unwrap_or_else(|_| panic!("Failed to parse YAML for provider {}", name));
+
+            let model_default_settings_ui_value = yaml_value.get("model_default_settings_ui").cloned()
+                .expect(&format!("Missing `model_model_default_settings_ui` for provider template {name}"));
+            let model_default_settings_ui = serde_yaml::from_value(model_default_settings_ui_value)
+                .unwrap_or_else(|e| panic!("Failed to parse model_defaults for provider {}: {}", name, e));
+
+            map.insert(name.to_string(), model_default_settings_ui);
+        }
+        map
+    })
+}
+
+/// Returns yaml files from providers.d directory, and list of errors from reading
+/// directory or listing files
+pub async fn get_provider_yaml_paths(config_dir: &Path) -> (Vec<PathBuf>, Vec<String>) {
+    let providers_dir = config_dir.join("providers.d");
+    let mut yaml_paths = Vec::new();
+    let mut errors = Vec::new();
+
+    let mut entries = match tokio::fs::read_dir(&providers_dir).await {
+        Ok(entries) => entries,
+        Err(e) => {
+            errors.push(format!("Failed to read providers directory: {e}"));
+            return (yaml_paths, errors);
+        }
+    };
+
+    while let Some(entry_result) = entries.next_entry().await.transpose() {
+        match entry_result {
+            Ok(entry) => {
+                let path = entry.path();
+
+                if path.is_file() &&
+                   path.extension().map_or(false, |ext| ext == "yaml" || ext == "yml") {
+                    yaml_paths.push(path);
+                }
+            },
+            Err(e) => {
+                errors.push(format!("Error reading directory entry: {e}"));
+            }
+        }
+    }
+
+    (yaml_paths, errors)
+}
+
+pub fn post_process_provider(provider: &mut CapsProvider, include_disabled_models: bool) {
+    add_running_models(provider);
+    populate_model_records(provider);
+    apply_models_dict_patch(provider);
+    add_name_and_id_to_model_records(provider);
+    if !include_disabled_models {
+        provider.chat_models.retain(|_, model| model.base.enabled);
+        provider.completion_models.retain(|_, model| model.base.enabled);
+    }
+}
+
+pub async fn read_providers_d(
+    prev_providers: Vec<CapsProvider>,
+    config_dir: &Path
+) -> (Vec<CapsProvider>, Vec<YamlError>) {
+    let providers_dir = config_dir.join("providers.d");
+    let mut providers = prev_providers;
+    let mut error_log = Vec::new();
+
+    let (yaml_paths, read_errors) = get_provider_yaml_paths(config_dir).await;
+    for error in read_errors {
+        error_log.push(YamlError {
+            path: providers_dir.to_string_lossy().to_string(),
+            error_line: 0,
+            error_msg: error.to_string(),
+        });
+    }
+
+    let provider_templates = get_provider_templates();
+
+    for yaml_path in yaml_paths {
+        let provider_name = match yaml_path.file_stem() {
+            Some(name) => name.to_string_lossy().to_string(),
+            None => continue,
+        };
+
+        if provider_templates.contains_key(&provider_name) {
+            match get_provider_from_template_and_config_file(config_dir, &provider_name, false, false).await {
+                Ok(provider) => {
+                    providers.push(provider);
+                },
+                Err(e) => {
+                    error_log.push(YamlError {
+                        path: yaml_path.to_string_lossy().to_string(),
+                        error_line: 0,
+                        error_msg: e,
+                    });
+                }
+            }
+        } else {
+            let content = match tokio::fs::read_to_string(&yaml_path).await {
+                Ok(content) => content,
+                Err(e) => {
+                    error_log.push(YamlError {
+                        path: yaml_path.to_string_lossy().to_string(),
+                        error_line: 0,
+                        error_msg: format!("Failed to read file: {}", e),
+                    });
+                    continue;
+                }
+            };
+
+            let mut provider: CapsProvider = match serde_yaml::from_str(&content) {
+                Ok(provider) => provider,
+                Err(e) => {
+                    error_log.push(YamlError {
+                        path: yaml_path.to_string_lossy().to_string(),
+                        error_line: e.location().map_or(0, |loc| loc.line()),
+                        error_msg: format!("Failed to parse YAML: {}", e),
+                    });
+                    continue;
+                }
+            };
+            provider.name = provider_name;
+            providers.push(provider);
+        }
+    }
+
+    (providers, error_log)
+}
+
+fn add_running_models(provider: &mut CapsProvider) {
+    let models_to_add = vec![
+        &provider.defaults.chat_default_model,
+        &provider.defaults.chat_light_model,
+        &provider.defaults.chat_thinking_model,
+        &provider.defaults.completion_default_model,
+    ];
+
+    for model in models_to_add {
+        if !model.is_empty() && !provider.running_models.contains(model) {
+            provider.running_models.push(model.clone());
+        }
+    }
+}
+
+/// Returns the latest modification timestamp in seconds of any YAML file in the providers.d directory
+pub async fn get_latest_provider_mtime(config_dir: &Path) -> Option<u64> {
+    let (yaml_paths, reading_errors) = get_provider_yaml_paths(config_dir).await;
+
+    for error in reading_errors {
+        tracing::error!("{error}");
+    }
+
+    let mut latest_mtime = None;
+    for path in yaml_paths {
+        match tokio::fs::metadata(&path).await {
+            Ok(metadata) => {
+                if let Ok(mtime) = metadata.modified() {
+                    latest_mtime = match latest_mtime {
+                        Some(current_latest) if mtime > current_latest => Some(mtime),
+                        None => Some(mtime),
+                        _ => latest_mtime,
+                    };
+                }
+            },
+            Err(e) => {
+                tracing::error!("Failed to get metadata for {}: {}", path.display(), e);
+            }
+        }
+    }
+
+    latest_mtime.map(|mtime| mtime.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs())
+}
+
+pub fn add_models_to_caps(caps: &mut CodeAssistantCaps, providers: Vec<CapsProvider>) {
+    fn add_provider_details_to_model(base_model_rec: &mut BaseModelRecord, provider: &CapsProvider, model_name: &str, endpoint: &str) {
+        base_model_rec.api_key = provider.api_key.clone();
+        base_model_rec.tokenizer_api_key = provider.tokenizer_api_key.clone();
+        base_model_rec.endpoint = endpoint.replace("$MODEL", model_name);
+        base_model_rec.support_metadata = provider.support_metadata;
+        base_model_rec.endpoint_style = provider.endpoint_style.clone();
+    }
+
+    for mut provider in providers {
+
+        let completion_models = std::mem::take(&mut provider.completion_models);
+        for (model_name, mut model_rec) in completion_models {
+            if model_rec.base.endpoint.is_empty() {
+                add_provider_details_to_model(
+                    &mut model_rec.base, &provider, &model_name, &provider.completion_endpoint
+                );
+
+                if provider.code_completion_n_ctx > 0 && provider.code_completion_n_ctx < model_rec.base.n_ctx {
+                    // model is capable of more, but we may limit it from server or provider, e.x. for latency
+                    model_rec.base.n_ctx = provider.code_completion_n_ctx;
+                }
+            }
+
+            caps.completion_models.insert(model_rec.base.id.clone(), Arc::new(model_rec));
+        }
+
+        let chat_models = std::mem::take(&mut provider.chat_models);
+        for (model_name, mut model_rec) in chat_models {
+            if model_rec.base.endpoint.is_empty() {
+                add_provider_details_to_model(
+                    &mut model_rec.base, &provider, &model_name, &provider.chat_endpoint
+                );
+            }
+
+            caps.chat_models.insert(model_rec.base.id.clone(), Arc::new(model_rec));
+        }
+
+        if provider.embedding_model.is_configured() && provider.embedding_model.base.enabled {
+            let mut embedding_model = std::mem::take(&mut provider.embedding_model);
+
+            if embedding_model.base.endpoint.is_empty() {
+                let model_name = embedding_model.base.name.clone();
+                add_provider_details_to_model(
+                    &mut embedding_model.base, &provider, &model_name, &provider.embedding_endpoint
+                );
+            }
+            caps.embedding_model = embedding_model;
+        }
+
+        caps.defaults.apply_override(&provider.defaults, Some(&provider.name));
+    }
+}
+
+fn add_name_and_id_to_model_records(provider: &mut CapsProvider) {
+    for (model_name, model_rec) in &mut provider.completion_models {
+        model_rec.base.name = model_name.to_string();
+        model_rec.base.id = format!("{}/{}", provider.name, model_name);
+    }
+
+    for (model_name, model_rec) in &mut provider.chat_models {
+        model_rec.base.name = model_name.to_string();
+        model_rec.base.id = format!("{}/{}", provider.name, model_name);
+    }
+
+    if provider.embedding_model.is_configured() {
+        provider.embedding_model.base.id = format!("{}/{}", provider.name, provider.embedding_model.base.name);
+    }
+}
+
+fn apply_models_dict_patch(provider: &mut CapsProvider) {
+    for (model_name, rec_patched) in provider.models_dict_patch.iter() {
+        if let Some(completion_rec) = provider.completion_models.get_mut(model_name) {
+            if let Some(n_ctx) = rec_patched.get("n_ctx").and_then(|v| v.as_u64()) {
+                completion_rec.base.n_ctx = n_ctx as usize;
+            }
+        }
+
+        if let Some(chat_rec) = provider.chat_models.get_mut(model_name) {
+            if let Some(n_ctx) = rec_patched.get("n_ctx").and_then(|v| v.as_u64()) {
+                chat_rec.base.n_ctx = n_ctx as usize;
+            }
+
+            if let Some(supports_tools) = rec_patched.get("supports_tools").and_then(|v| v.as_bool()) {
+                chat_rec.supports_tools = supports_tools;
+            }
+            if let Some(supports_multimodality) = rec_patched.get("supports_multimodality").and_then(|v| v.as_bool()) {
+                chat_rec.supports_multimodality = supports_multimodality;
+            }
+        }
+    }
+}
+
+#[derive(Deserialize)]
+pub struct KnownModels {
+    pub completion_models: IndexMap<String, CompletionModelRecord>,
+    pub chat_models: IndexMap<String, ChatModelRecord>,
+    pub embedding_models: IndexMap<String, EmbeddingModelRecord>,
+}
+const UNPARSED_KNOWN_MODELS: &'static str = include_str!("../known_models.json");
+static KNOWN_MODELS: OnceLock<KnownModels> = OnceLock::new();
+
+pub fn get_known_models() -> &'static KnownModels {
+    KNOWN_MODELS.get_or_init(|| {
+        serde_json::from_str::<KnownModels>(UNPARSED_KNOWN_MODELS).map_err(|e| {
+            let up_to_line = UNPARSED_KNOWN_MODELS.lines().take(e.line()).collect::<Vec<&str>>().join("\n");
+            panic!("{}\nfailed to parse KNOWN_MODELS: {}", up_to_line, e);
+        }).unwrap()
+    })
+}
+
+fn populate_model_records(provider: &mut CapsProvider) {
+    let known_models = get_known_models();
+
+    for model_name in &provider.running_models {
+        if !provider.completion_models.contains_key(model_name) {
+            if let Some(model_rec) = find_model_match(model_name, &provider.completion_models, &known_models.completion_models) {
+                provider.completion_models.insert(model_name.clone(), model_rec);
+            }
+        }
+
+        if !provider.chat_models.contains_key(model_name) {
+            if let Some(model_rec) = find_model_match(model_name, &provider.chat_models, &known_models.chat_models) {
+                provider.chat_models.insert(model_name.clone(), model_rec);
+            }
+        }
+    }
+
+    for model in &provider.running_models {
+        if !provider.completion_models.contains_key(model) &&
+            !provider.chat_models.contains_key(model) &&
+            !(model == &provider.embedding_model.base.name) {
+            tracing::warn!("Indicated as running, unknown model {:?} for provider {}, maybe update this rust binary", model, provider.name);
+        }
+    }
+
+    if !provider.embedding_model.is_configured() && !provider.embedding_model.base.name.is_empty() {
+        let model_name = provider.embedding_model.base.name.clone();
+        if let Some(model_rec) = find_model_match(&model_name, &IndexMap::new(), &known_models.embedding_models) {
+            provider.embedding_model = model_rec;
+            provider.embedding_model.base.name = model_name;
+        } else {
+            tracing::warn!("Unknown embedding model '{}', maybe configure it or update this binary", model_name);
+        }
+    }
+}
+
+fn find_model_match<T: Clone + HasBaseModelRecord>(
+    model_name: &String,
+    provider_models: &IndexMap<String, T>,
+    known_models: &IndexMap<String, T>
+) -> Option<T> {
+    let model_stripped = strip_model_from_finetune(model_name);
+
+    if let Some(model) = provider_models.get(model_name)
+        .or_else(|| provider_models.get(&model_stripped)) {
+        return Some(model.clone());
+    }
+
+    for model in provider_models.values() {
+        if model.base().similar_models.contains(model_name) ||
+            model.base().similar_models.contains(&model_stripped) {
+            return Some(model.clone());
+        }
+    }
+
+    if let Some(model) = known_models.get(model_name)
+        .or_else(|| known_models.get(&model_stripped)) {
+        return Some(model.clone());
+    }
+
+    for model in known_models.values() {
+        if model.base().similar_models.contains(&model_name.to_string()) ||
+            model.base().similar_models.contains(&model_stripped) {
+            return Some(model.clone());
+        }
+    }
+
+    None
+}
+
+pub fn resolve_api_key(provider: &CapsProvider, key: &str, fallback: &str, key_name: &str) -> String {
+    match key {
+        k if k.is_empty() => fallback.to_string(),
+        k if k.starts_with("$") => {
+            match std::env::var(&k[1..]) {
+                Ok(env_val) => env_val,
+                Err(e) => {
+                    tracing::error!(
+                        "tried to read {} from env var {} for provider {}, but failed: {}",
+                        key_name, k, provider.name, e
+                    );
+                    fallback.to_string()
+                }
+            }
+        }
+        k => k.to_string(),
+    }
+}
+
+pub fn resolve_provider_api_key(provider: &CapsProvider, cmdline_api_key: &str) -> String {
+    resolve_api_key(provider, &provider.api_key, &cmdline_api_key, "API key")
+}
+
+pub fn resolve_tokenizer_api_key(provider: &CapsProvider) -> String {
+    resolve_api_key(provider, &provider.tokenizer_api_key, "", "tokenizer API key")
+}
+
+pub async fn get_provider_from_template_and_config_file(
+    config_dir: &Path, name: &str, config_file_must_exist: bool, post_process: bool
+) -> Result<CapsProvider, String> {
+    let mut provider = get_provider_templates().get(name).cloned()
+        .ok_or("Provider template not found")?;
+
+    let provider_path = config_dir.join("providers.d").join(format!("{name}.yaml"));
+    let config_file_value = match tokio::fs::read_to_string(&provider_path).await {
+        Ok(content) => {
+            serde_yaml::from_str::<serde_yaml::Value>(&content)
+                .map_err_with_prefix(format!("Error parsing file {}:", provider_path.display()))?
+        },
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound && !config_file_must_exist => {
+            serde_yaml::Value::Mapping(serde_yaml::Mapping::new())
+        },
+        Err(e) => {
+            return Err(format!("Failed to read file {}: {}", provider_path.display(), e));
+        }
+    };
+
+    provider.apply_override(config_file_value)?;
+
+    if post_process {
+        post_process_provider(&mut provider, true);
+    }
+
+    Ok(provider)
+}
+
+pub async fn get_provider_from_server(gcx: Arc<ARwLock<GlobalContext>>) -> Result<CapsProvider, String> {
+    let command_line = CommandLine::from_args();
+    let cmdline_api_key = command_line.api_key.clone();
+    let (caps_value, caps_url) = load_caps_value_from_url(command_line, gcx.clone()).await?;
+
+    if let Ok(self_hosted_caps) = serde_json::from_value::<SelfHostedCaps>(caps_value.clone()) {
+        let mut provider = self_hosted_caps.into_provider(&caps_url, &cmdline_api_key)?;
+        post_process_provider(&mut provider, true);
+        provider.api_key = resolve_provider_api_key(&provider, &cmdline_api_key);
+        provider.tokenizer_api_key = resolve_tokenizer_api_key(&provider);
+        Ok(provider)
+    } else {
+        let mut provider = serde_json::from_value::<CapsProvider>(caps_value).map_err_to_string()?;
+
+        resolve_relative_urls(&mut provider, &caps_url)?;
+        post_process_provider(&mut provider, true);
+        provider.api_key = resolve_provider_api_key(&provider, &cmdline_api_key);
+        provider.tokenizer_api_key = resolve_tokenizer_api_key(&provider);
+        Ok(provider)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_provider_templates() {
+        let _ = get_provider_templates(); // This will panic if any template fails to parse
+    }
+
+    #[test]
+    fn test_parse_known_models() {
+        let _ = get_known_models(); // This will panic if any model fails to parse
+    }
+}
diff --git a/refact-agent/engine/src/caps/self_hosted.rs b/refact-agent/engine/src/caps/self_hosted.rs
new file mode 100644
index 000000000..6bedfa265
--- /dev/null
+++ b/refact-agent/engine/src/caps/self_hosted.rs
@@ -0,0 +1,370 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use indexmap::IndexMap;
+use serde::Deserialize;
+
+use crate::caps::{
+    BaseModelRecord, ChatModelRecord, CodeAssistantCaps, CompletionModelRecord, DefaultModels,
+    EmbeddingModelRecord, default_chat_scratchpad, default_completion_scratchpad,
+    default_completion_scratchpad_patch, default_embedding_batch, default_hf_tokenizer_template,
+    default_rejection_threshold, relative_to_full_url, normalize_string, resolve_relative_urls
+};
+use crate::caps::providers;
+
+#[derive(Debug, Deserialize, Clone, Default)]
+pub struct SelfHostedCapsModelRecord {
+    pub n_ctx: usize,
+
+    #[serde(default)]
+    pub supports_scratchpads: HashMap<String, serde_json::Value>,
+
+    #[serde(default)]
+    pub supports_tools: bool,
+
+    #[serde(default)]
+    pub supports_multimodality: bool,
+
+    #[serde(default)]
+    pub supports_clicks: bool,
+
+    #[serde(default)]
+    pub supports_agent: bool,
+
+    #[serde(default)]
+    pub supports_reasoning: Option<String>,
+
+    #[serde(default)]
+    pub supports_boost_reasoning: bool,
+
+    #[serde(default)]
+    pub default_temperature: Option<f32>,
+}
+
+#[derive(Debug, Deserialize, Clone, Default)]
+pub struct SelfHostedCapsEmbeddingModelRecord {
+    pub n_ctx: usize,
+    pub size: i32,
+}
+
+#[derive(Debug, Deserialize, Clone, Default)]
+pub struct SelfHostedCapsCompletion {
+    pub endpoint: String,
+    pub models: IndexMap<String, SelfHostedCapsModelRecord>,
+    pub default_model: String,
+}
+
+#[derive(Debug, Deserialize, Clone, Default)]
+pub struct SelfHostedCapsChat {
+    pub endpoint: String,
+    pub models: IndexMap<String, SelfHostedCapsModelRecord>,
+    pub default_model: String,
+}
+
+#[derive(Debug, Deserialize, Clone, Default)]
+pub struct SelfHostedCapsEmbedding {
+    pub endpoint: String,
+    pub models: IndexMap<String, SelfHostedCapsEmbeddingModelRecord>,
+    pub default_model: String,
+}
+
+#[derive(Debug, Deserialize, Clone, Default)]
+pub struct SelfHostedCapsTelemetryEndpoints {
+    pub telemetry_basic_endpoint: String,
+    pub telemetry_basic_retrieve_my_own_endpoint: String,
+}
+
+#[derive(Debug, Deserialize, Clone, Default)]
+pub struct SelfHostedCaps {
+    #[serde(deserialize_with = "normalize_string")]
+    pub cloud_name: String,
+
+    pub completion: SelfHostedCapsCompletion,
+    pub chat: SelfHostedCapsChat,
+    pub embedding: SelfHostedCapsEmbedding,
+
+    pub telemetry_endpoints: SelfHostedCapsTelemetryEndpoints,
+    pub tokenizer_endpoints: HashMap<String, String>,
+
+    #[serde(default)]
+    pub customization: String,
+    pub caps_version: i64,
+}
+
+fn configure_base_model(
+    base_model: &mut BaseModelRecord,
+    model_name: &str,
+    endpoint: &str,
+    cloud_name: &str,
+    tokenizer_endpoints: &HashMap<String, String>,
+    caps_url: &String,
+    cmdline_api_key: &str,
+) -> Result<(), String> {
+    base_model.name = model_name.to_string();
+    base_model.id = format!("{}/{}", cloud_name, model_name);
+    if base_model.endpoint.is_empty() {
+        base_model.endpoint = relative_to_full_url(caps_url, &endpoint.replace("$MODEL", model_name))?;
+    }
+    if let Some(tokenizer) = tokenizer_endpoints.get(&base_model.name) {
+        base_model.tokenizer = relative_to_full_url(caps_url, &tokenizer)?;
+    }
+    base_model.api_key = cmdline_api_key.to_string();
+    base_model.endpoint_style = "openai".to_string();
+    Ok(())
+}
+
+impl SelfHostedCapsModelRecord {
+    fn get_completion_scratchpad(&self) -> (String, serde_json::Value) {
+        if !self.supports_scratchpads.is_empty() {
+            let scratchpad_name = self.supports_scratchpads.keys().next().unwrap_or(&default_completion_scratchpad()).clone();
+            let scratchpad_patch = self.supports_scratchpads.values().next().unwrap_or(&serde_json::Value::Null).clone();
+            (scratchpad_name, scratchpad_patch)
+        } else {
+            (default_completion_scratchpad(), default_completion_scratchpad_patch())
+        }
+    }
+
+    fn get_chat_scratchpad(&self) -> (String, serde_json::Value) {
+        if !self.supports_scratchpads.is_empty() {
+            let scratchpad_name = self.supports_scratchpads.keys().next().unwrap_or(&default_chat_scratchpad()).clone();
+            let scratchpad_patch = self.supports_scratchpads.values().next().unwrap_or(&serde_json::Value::Null).clone();
+            (scratchpad_name, scratchpad_patch)
+        } else {
+            (default_chat_scratchpad(), serde_json::Value::Null)
+        }
+    }
+
+    pub fn into_completion_model(
+        &self,
+        model_name: &str,
+        self_hosted_caps: &SelfHostedCaps,
+        caps_url: &String,
+        cmdline_api_key: &str,
+    ) -> Result<CompletionModelRecord, String> {
+        let mut base = BaseModelRecord {
+            n_ctx: self.n_ctx,
+            enabled: true,
+            ..Default::default()
+        };
+
+        configure_base_model(
+            &mut base,
+            model_name,
+            &self_hosted_caps.completion.endpoint,
+            &self_hosted_caps.cloud_name,
+            &self_hosted_caps.tokenizer_endpoints,
+            caps_url,
+            cmdline_api_key,
+        )?;
+
+        let (scratchpad, scratchpad_patch) = self.get_completion_scratchpad();
+
+        Ok(CompletionModelRecord {
+            base,
+            scratchpad,
+            scratchpad_patch,
+            model_family: None,
+        })
+    }
+}
+
+impl SelfHostedCapsModelRecord {
+    pub fn into_chat_model(
+        &self,
+        model_name: &str,
+        self_hosted_caps: &SelfHostedCaps,
+        caps_url: &String,
+        cmdline_api_key: &str,
+    ) -> Result<ChatModelRecord, String> {
+        let mut base = BaseModelRecord {
+            n_ctx: self.n_ctx,
+            enabled: true,
+            ..Default::default()
+        };
+
+        let (scratchpad, scratchpad_patch) = self.get_chat_scratchpad();
+
+        // Non passthrough models, don't support endpoints of `/v1/chat/completions` in openai style, only `/v1/completions`
+        let endpoint_to_use = if scratchpad == "PASSTHROUGH" {
+            &self_hosted_caps.chat.endpoint
+        } else {
+            &self_hosted_caps.completion.endpoint
+        };
+
+        configure_base_model(
+            &mut base,
+            model_name,
+            endpoint_to_use,
+            &self_hosted_caps.cloud_name,
+            &self_hosted_caps.tokenizer_endpoints,
+            caps_url,
+            cmdline_api_key,
+        )?;
+
+        Ok(ChatModelRecord {
+            base,
+            scratchpad,
+            scratchpad_patch,
+            supports_tools: self.supports_tools,
+            supports_multimodality: self.supports_multimodality,
+            supports_clicks: self.supports_clicks,
+            supports_agent: self.supports_agent,
+            supports_reasoning: self.supports_reasoning.clone(),
+            supports_boost_reasoning: self.supports_boost_reasoning,
+            default_temperature: self.default_temperature,
+        })
+    }
+}
+
+impl SelfHostedCapsEmbeddingModelRecord {
+    pub fn into_embedding_model(
+        &self,
+        model_name: &str,
+        self_hosted_caps: &SelfHostedCaps,
+        caps_url: &String,
+        cmdline_api_key: &str,
+    ) -> Result<EmbeddingModelRecord, String> {
+        let mut embedding_model = EmbeddingModelRecord {
+            base: BaseModelRecord { n_ctx: self.n_ctx, enabled: true, ..Default::default() },
+            embedding_size: self.size,
+            rejection_threshold: default_rejection_threshold(),
+            embedding_batch: default_embedding_batch(),
+        };
+
+        configure_base_model(
+            &mut embedding_model.base,
+            model_name,
+            &self_hosted_caps.embedding.endpoint,
+            &self_hosted_caps.cloud_name,
+            &self_hosted_caps.tokenizer_endpoints,
+            caps_url,
+            cmdline_api_key,
+        )?;
+
+        Ok(embedding_model)
+    }
+}
+
+
+impl SelfHostedCaps {
+    pub fn into_caps(self, caps_url: &String, cmdline_api_key: &str) -> Result<CodeAssistantCaps, String> {
+        let mut caps = CodeAssistantCaps {
+            cloud_name: self.cloud_name.clone(),
+
+            telemetry_basic_dest: relative_to_full_url(caps_url, &self.telemetry_endpoints.telemetry_basic_endpoint)?,
+            telemetry_basic_retrieve_my_own: relative_to_full_url(caps_url, &self.telemetry_endpoints.telemetry_basic_retrieve_my_own_endpoint)?,
+
+            completion_models: IndexMap::new(),
+            chat_models: IndexMap::new(),
+            embedding_model: EmbeddingModelRecord::default(),
+
+            defaults: DefaultModels {
+                completion_default_model: format!("{}/{}", self.cloud_name, self.completion.default_model),
+                chat_default_model: format!("{}/{}", self.cloud_name, self.chat.default_model),
+                chat_thinking_model: String::new(),
+                chat_light_model: format!("{}/{}", self.cloud_name, self.chat.default_model),
+            },
+            customization: self.customization.clone(),
+            caps_version: self.caps_version,
+
+            hf_tokenizer_template: default_hf_tokenizer_template(),
+        };
+
+        for (model_name, model_rec) in &self.completion.models {
+            let completion_model = model_rec.into_completion_model(
+                model_name,
+                &self,
+                caps_url,
+                cmdline_api_key,
+            )?;
+
+            caps.completion_models.insert(completion_model.base.id.clone(), Arc::new(completion_model));
+        }
+
+        for (model_name, model_rec) in &self.chat.models {
+            let chat_model = model_rec.into_chat_model(
+                model_name,
+                &self,
+                caps_url,
+                cmdline_api_key,
+            )?;
+
+            caps.chat_models.insert(chat_model.base.id.clone(), Arc::new(chat_model));
+        }
+
+        if let Some((model_name, model_rec)) = self.embedding.models.get_key_value(&self.embedding.default_model) {
+            let embedding_model = model_rec.into_embedding_model(
+                model_name,
+                &self,
+                caps_url,
+                cmdline_api_key,
+            )?;
+            caps.embedding_model = embedding_model;
+        }
+
+        Ok(caps)
+    }
+
+    pub fn into_provider(self, caps_url: &String, cmdline_api_key: &str) -> Result<providers::CapsProvider, String> {
+        let mut provider = providers::CapsProvider {
+            name: self.cloud_name.clone(),
+            enabled: true,
+            supports_completion: true,
+            endpoint_style: "openai".to_string(),
+            completion_endpoint: self.completion.endpoint.clone(),
+            chat_endpoint: self.chat.endpoint.clone(),
+            embedding_endpoint: self.embedding.endpoint.clone(),
+            api_key: cmdline_api_key.to_string(),
+            tokenizer_api_key: cmdline_api_key.to_string(),
+            code_completion_n_ctx: 0,
+            support_metadata: false,
+            completion_models: IndexMap::new(),
+            chat_models: IndexMap::new(),
+            embedding_model: EmbeddingModelRecord::default(),
+            models_dict_patch: IndexMap::new(),
+            defaults: DefaultModels {
+                completion_default_model: self.completion.default_model.clone(),
+                chat_default_model: self.chat.default_model.clone(),
+                chat_thinking_model: String::new(),
+                chat_light_model: String::new(),
+            },
+            running_models: Vec::new(),
+        };
+
+        for (model_name, model_rec) in &self.completion.models {
+            let completion_model = model_rec.into_completion_model(
+                model_name,
+                &self,
+                caps_url,
+                cmdline_api_key,
+            )?;
+
+            provider.completion_models.insert(model_name.clone(), completion_model);
+        }
+
+        for (model_name, model_rec) in &self.chat.models {
+            let chat_model = model_rec.into_chat_model(
+                model_name,
+                &self,
+                caps_url,
+                cmdline_api_key,
+            )?;
+
+            provider.chat_models.insert(model_name.clone(), chat_model);
+        }
+
+        if let Some((model_name, model_rec)) = self.embedding.models.get_key_value(&self.embedding.default_model) {
+            let embedding_model = model_rec.into_embedding_model(
+                model_name,
+                &self,
+                caps_url,
+                cmdline_api_key,
+            )?;
+            provider.embedding_model = embedding_model;
+        }
+
+        resolve_relative_urls(&mut provider, caps_url)?;
+
+        Ok(provider)
+    }
+}
diff --git a/refact-agent/engine/src/custom_error.rs b/refact-agent/engine/src/custom_error.rs
index 454aef962..191c5b763 100644
--- a/refact-agent/engine/src/custom_error.rs
+++ b/refact-agent/engine/src/custom_error.rs
@@ -1,4 +1,5 @@
 use std::error::Error;
+use serde::Serialize;
 use hyper::StatusCode;
 use serde_json::json;
 use std::fmt;
@@ -52,6 +53,35 @@ impl ScratchError {
     }
 }
 
+#[derive(Serialize, Default)]
+pub struct YamlError {
+    pub path: String,
+    pub error_line: usize,  // starts with 1, zero if invalid
+    pub error_msg: String,
+}
+
+impl From<(&str, &serde_yaml::Error)> for YamlError {
+    fn from((path, err): (&str, &serde_yaml::Error)) -> Self {
+        YamlError {
+            path: path.to_string(),
+            error_line: err.location().map(|loc| loc.line()).unwrap_or(0),
+            error_msg: err.to_string(),
+        }
+    }
+}
+
+impl fmt::Display for YamlError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "{}:{} {:?}",
+            crate::nicer_logs::last_n_chars(&self.path, 40),
+            self.error_line,
+            self.error_msg
+        )
+    }
+}
+
 pub trait MapErrToString<T> {
     /// Same as .map_err(|e| e.to_string())
     fn map_err_to_string(self) -> Result<T, String>;
diff --git a/refact-agent/engine/src/fetch_embedding.rs b/refact-agent/engine/src/fetch_embedding.rs
index 81a00bd08..38a8b4c45 100644
--- a/refact-agent/engine/src/fetch_embedding.rs
+++ b/refact-agent/engine/src/fetch_embedding.rs
@@ -3,22 +3,20 @@ use std::sync::Arc;
 use tokio::sync::Mutex as AMutex;
 use tracing::error;
 
+use crate::caps::EmbeddingModelRecord;
 use crate::forward_to_hf_endpoint::get_embedding_hf_style;
 use crate::forward_to_openai_endpoint::get_embedding_openai_style;
 
 pub async fn get_embedding(
     client: Arc<AMutex<reqwest::Client>>,
-    endpoint_embeddings_style: &String,
-    model_name: &String,
-    endpoint_template: &String,
+    embedding_model: &EmbeddingModelRecord,
     text: Vec<String>,
-    api_key: &String,
 ) -> Result<Vec<Vec<f32>>, String> {
-    match endpoint_embeddings_style.to_lowercase().as_str() {
-        "hf" => get_embedding_hf_style(client, text, endpoint_template, model_name, api_key).await,
-        "openai" => get_embedding_openai_style(client, text, endpoint_template, model_name, api_key).await,
+    match embedding_model.base.endpoint_style.to_lowercase().as_str() {
+        "hf" => get_embedding_hf_style(client, text, embedding_model).await,
+        "openai" => get_embedding_openai_style(client, text, embedding_model).await,
         _ => {
-            error!("Invalid endpoint_embeddings_style: {}", endpoint_embeddings_style);
+            error!("Invalid endpoint_embeddings_style: {}", embedding_model.base.endpoint_style);
             Err("Invalid endpoint_embeddings_style".to_string())
         }
     }
@@ -29,13 +27,10 @@ const SLEEP_ON_BATCH_ONE: u64 = 100;
 
 
 // HF often returns 500 errors for no reason
-pub async fn get_embedding_with_retry(
+pub async fn get_embedding_with_retries(
     client: Arc<AMutex<reqwest::Client>>,
-    endpoint_embeddings_style: &String,
-    model_name: &String,
-    endpoint_template: &String,
+    embedding_model: &EmbeddingModelRecord,
     text: Vec<String>,
-    api_key: &String,
     max_retries: usize,
 ) -> Result<Vec<Vec<f32>>, String> {
     let mut attempt_n = 0;
@@ -43,11 +38,8 @@ pub async fn get_embedding_with_retry(
         attempt_n += 1;
         match get_embedding(
             client.clone(),
-            endpoint_embeddings_style,
-            model_name,
-            endpoint_template,
+            embedding_model,
             text.clone(),
-            api_key,
         ).await {
             Ok(embedding) => return Ok(embedding),
             Err(e) => {
diff --git a/refact-agent/engine/src/forward_to_hf_endpoint.rs b/refact-agent/engine/src/forward_to_hf_endpoint.rs
index 86bc8c348..9e830cd9a 100644
--- a/refact-agent/engine/src/forward_to_hf_endpoint.rs
+++ b/refact-agent/engine/src/forward_to_hf_endpoint.rs
@@ -8,27 +8,24 @@ use serde_json::json;
 use tokio::sync::Mutex as AMutex;
 
 use crate::call_validation::{ChatMeta, SamplingParameters};
+use crate::caps::BaseModelRecord;
+use crate::caps::EmbeddingModelRecord;
 
 // Idea: use USER_AGENT
 // let user_agent = format!("{NAME}/{VERSION}; rust/unknown; ide/{ide:?}");
 
 
 pub async fn forward_to_hf_style_endpoint(
-    save_url: &mut String,
-    bearer: String,
-    model_name: &str,
+    model_rec: &BaseModelRecord,
     prompt: &str,
     client: &reqwest::Client,
-    endpoint_template: &String,
     sampling_parameters: &SamplingParameters,
     meta: Option<ChatMeta>
 ) -> Result<serde_json::Value, String> {
-    let url = endpoint_template.replace("$MODEL", model_name);
-    save_url.clone_from(&&url);
     let mut headers = HeaderMap::new();
     headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap());
-    if !bearer.is_empty() {
-        headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap());
+    if !model_rec.api_key.is_empty() {
+        headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap());
     }
     let params_string = serde_json::to_string(sampling_parameters).unwrap();
     let mut params_json = serde_json::from_str::<serde_json::Value>(&params_string).unwrap();
@@ -42,7 +39,7 @@ pub async fn forward_to_hf_style_endpoint(
         data["meta"] = serde_json::to_value(meta).unwrap();
     }
     
-    let req = client.post(&url)
+    let req = client.post(&model_rec.endpoint)
         .headers(headers)
         .body(data.to_string())
         .send()
@@ -50,34 +47,29 @@ pub async fn forward_to_hf_style_endpoint(
     let resp = req.map_err(|e| format!("{}", e))?;
     let status_code = resp.status().as_u16();
     let response_txt = resp.text().await.map_err(|e|
-        format!("reading from socket {}: {}", url, e)
+        format!("reading from socket {}: {}", model_rec.endpoint, e)
     )?;
     if status_code != 200 {
-        return Err(format!("{} status={} text {}", url, status_code, response_txt));
+        return Err(format!("{} status={} text {}", model_rec.endpoint, status_code, response_txt));
     }
     Ok(match serde_json::from_str(&response_txt) {
         Ok(json) => json,
-        Err(e) => return Err(format!("{}: {}", url, e)),
+        Err(e) => return Err(format!("{}: {}", model_rec.endpoint, e)),
     })
 }
 
 
 pub async fn forward_to_hf_style_endpoint_streaming(
-    save_url: &mut String,
-    bearer: String,
-    model_name: &str,
+    model_rec: &BaseModelRecord,
     prompt: &str,
     client: &reqwest::Client,
-    endpoint_template: &String,
     sampling_parameters: &SamplingParameters,
     meta: Option<ChatMeta>
 ) -> Result<EventSource, String> {
-    let url = endpoint_template.replace("$MODEL", model_name);
-    save_url.clone_from(&&url);
     let mut headers = HeaderMap::new();
     headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap());
-    if !bearer.is_empty() {
-        headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap());
+    if !model_rec.api_key.is_empty() {
+        headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap());
     }
     let params_string = serde_json::to_string(sampling_parameters).unwrap();
     let mut params_json = serde_json::from_str::<serde_json::Value>(&params_string).unwrap();
@@ -92,11 +84,11 @@ pub async fn forward_to_hf_style_endpoint_streaming(
         data["meta"] = serde_json::to_value(meta).unwrap();
     }
 
-    let builder = client.post(&url)
+    let builder = client.post(&model_rec.endpoint)
         .headers(headers)
         .body(data.to_string());
     let event_source: EventSource = EventSource::new(builder).map_err(|e|
-        format!("can't stream from {}: {}", url, e)
+        format!("can't stream from {}: {}", model_rec.endpoint, e)
     )?;
     Ok(event_source)
 }
@@ -125,16 +117,13 @@ struct EmbeddingsPayloadHF {
 pub async fn get_embedding_hf_style(
     client: std::sync::Arc<AMutex<reqwest::Client>>,
     text: Vec<String>,
-    endpoint_template: &String,
-    model_name: &String,
-    api_key: &String,
+    model: &EmbeddingModelRecord,
 ) -> Result<Vec<Vec<f32>>, String> {
     let payload = EmbeddingsPayloadHF { inputs: text, options: EmbeddingsPayloadHFOptions::new() };
-    let url = endpoint_template.clone().replace("$MODEL", &model_name);
 
     let maybe_response = client.lock().await
-        .post(&url)
-        .bearer_auth(api_key.clone())
+        .post(&model.base.endpoint)
+        .bearer_auth(model.base.api_key.clone())
         .json(&payload)
         .send()
         .await;
diff --git a/refact-agent/engine/src/forward_to_openai_endpoint.rs b/refact-agent/engine/src/forward_to_openai_endpoint.rs
index 6721c7aef..8879a2188 100644
--- a/refact-agent/engine/src/forward_to_openai_endpoint.rs
+++ b/refact-agent/engine/src/forward_to_openai_endpoint.rs
@@ -10,33 +10,29 @@ use tokio::sync::Mutex as AMutex;
 use tracing::info;
 
 use crate::call_validation::{ChatMeta, SamplingParameters};
+use crate::caps::BaseModelRecord;
+use crate::custom_error::MapErrToString;
 use crate::scratchpads::chat_utils_limit_history::CompressionStrength;
+use crate::caps::EmbeddingModelRecord;
 
 pub async fn forward_to_openai_style_endpoint(
-    save_url: &mut String,
-    bearer: String,
-    model_name: &str,
+    model_rec: &BaseModelRecord,
     prompt: &str,
     client: &reqwest::Client,
-    endpoint_template: &String,
-    endpoint_chat_passthrough: &String,
     sampling_parameters: &SamplingParameters,
-    is_metadata_supported: bool,
     meta: Option<ChatMeta>
 ) -> Result<serde_json::Value, String> {
     let is_passthrough = prompt.starts_with("PASSTHROUGH ");
-    let url = if !is_passthrough { endpoint_template.replace("$MODEL", model_name) } else { endpoint_chat_passthrough.clone() };
-    save_url.clone_from(&&url);
     let mut headers = HeaderMap::new();
     headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap());
-    if !bearer.is_empty() {
-        headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap());
+    if !model_rec.api_key.is_empty() {
+        headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap());
     }
-    if is_metadata_supported {
-        headers.insert(USER_AGENT, HeaderValue::from_str(format!("refact-lsp {}", crate::version::build_info::PKG_VERSION).as_str()).unwrap());
+    if model_rec.support_metadata {
+        headers.insert(USER_AGENT, HeaderValue::from_str(&format!("refact-lsp {}", crate::version::build_info::PKG_VERSION)).unwrap());
     }
     let mut data = json!({
-        "model": model_name,
+        "model": model_rec.name.clone(),
         "stream": false,
     });
     if !sampling_parameters.stop.is_empty() {  // openai does not like empty stop
@@ -49,15 +45,15 @@ pub async fn forward_to_openai_style_endpoint(
         data["reasoning_effort"] = serde_json::Value::String(reasoning_effort.to_string());
     } else if let Some(thinking) = sampling_parameters.thinking.clone() {
         data["thinking"] = thinking.clone();
-    } else {
-        data["temperature"] = serde_json::Value::from(sampling_parameters.temperature);
+    } else if let Some(temperature) = sampling_parameters.temperature {
+        data["temperature"] = serde_json::Value::from(temperature);
     }
     data["max_completion_tokens"] = serde_json::Value::from(sampling_parameters.max_new_tokens);
     info!("NOT STREAMING TEMP {}", sampling_parameters.temperature
         .map(|x| x.to_string())
         .unwrap_or("None".to_string()));
     if is_passthrough {
-        passthrough_messages_to_json(&mut data, prompt, model_name);
+        passthrough_messages_to_json(&mut data, prompt, &model_rec.name);
     } else {
         data["prompt"] = serde_json::Value::String(prompt.to_string());
         data["echo"] = serde_json::Value::Bool(false);
@@ -67,23 +63,23 @@ pub async fn forward_to_openai_style_endpoint(
     }
     
     // When cancelling requests, coroutine ususally gets aborted here on the following line.
-    let req = client.post(&url)
+    let req = client.post(&model_rec.endpoint)
         .headers(headers)
         .body(data.to_string())
         .send()
         .await;
-    let resp = req.map_err(|e| format!("{}", e))?;
+    let resp = req.map_err_to_string()?;
     let status_code = resp.status().as_u16();
     let response_txt = resp.text().await.map_err(|e|
-        format!("reading from socket {}: {}", url, e)
+        format!("reading from socket {}: {}", model_rec.endpoint, e)
     )?;
     // 400 "client error" is likely a json that we rather accept here, pick up error details as we analyse json fields at the level
     // higher, the most often 400 is no such model.
     if status_code != 200 && status_code != 400 {
-        return Err(format!("{} status={} text {}", url, status_code, response_txt));
+        return Err(format!("{} status={} text {}", model_rec.endpoint, status_code, response_txt));
     }
     if status_code != 200 {
-        info!("forward_to_openai_style_endpoint: {} {}\n{}", url, status_code, response_txt);
+        tracing::info!("forward_to_openai_style_endpoint: {} {}\n{}", model_rec.endpoint, status_code, response_txt);
     }
     let parsed_json: serde_json::Value = match serde_json::from_str(&response_txt) {
         Ok(json) => json,
@@ -93,37 +89,30 @@ pub async fn forward_to_openai_style_endpoint(
 }
 
 pub async fn forward_to_openai_style_endpoint_streaming(
-    save_url: &mut String,
-    bearer: String,
-    model_name: &str,
+    model_rec: &BaseModelRecord,
     prompt: &str,
     client: &reqwest::Client,
-    endpoint_template: &String,
-    endpoint_chat_passthrough: &String,
     sampling_parameters: &SamplingParameters,
-    is_metadata_supported: bool,
     meta: Option<ChatMeta>
 ) -> Result<EventSource, String> {
     let is_passthrough = prompt.starts_with("PASSTHROUGH ");
-    let url = if !is_passthrough { endpoint_template.replace("$MODEL", model_name) } else { endpoint_chat_passthrough.clone() };
-    save_url.clone_from(&&url);
     let mut headers = HeaderMap::new();
     headers.insert(CONTENT_TYPE, HeaderValue::from_str("application/json").unwrap());
-    if !bearer.is_empty() {
-        headers.insert(AUTHORIZATION, HeaderValue::from_str(format!("Bearer {}", bearer).as_str()).unwrap());
+    if !model_rec.api_key.is_empty() {
+        headers.insert(AUTHORIZATION, HeaderValue::from_str(&format!("Bearer {}", model_rec.api_key)).unwrap());
     }
-    if is_metadata_supported {
+    if model_rec.support_metadata {
         headers.insert(USER_AGENT, HeaderValue::from_str(format!("refact-lsp {}", crate::version::build_info::PKG_VERSION).as_str()).unwrap());
     }
 
     let mut data = json!({
-        "model": model_name,
+        "model": model_rec.name,
         "stream": true,
         "stream_options": {"include_usage": true},
     });
 
     if is_passthrough {
-        passthrough_messages_to_json(&mut data, prompt, model_name);
+        passthrough_messages_to_json(&mut data, prompt, &model_rec.name);
     } else {
         data["prompt"] = serde_json::Value::String(prompt.to_string());
     }
@@ -139,8 +128,8 @@ pub async fn forward_to_openai_style_endpoint_streaming(
         data["reasoning_effort"] = serde_json::Value::String(reasoning_effort.to_string());
     } else if let Some(thinking) = sampling_parameters.thinking.clone() {
         data["thinking"] = thinking.clone();
-    } else {
-        data["temperature"] = serde_json::Value::from(sampling_parameters.temperature);
+    } else if let Some(temperature) = sampling_parameters.temperature {
+        data["temperature"] = serde_json::Value::from(temperature);
     }
     data["max_completion_tokens"] = serde_json::Value::from(sampling_parameters.max_new_tokens);
 
@@ -151,11 +140,15 @@ pub async fn forward_to_openai_style_endpoint_streaming(
     if let Some(meta) = meta {
         data["meta"] = json!(meta);
     }
-    let builder = client.post(&url)
+    
+    if model_rec.endpoint.is_empty() {
+        return Err(format!("No endpoint configured for {}", model_rec.id));
+    }
+    let builder = client.post(&model_rec.endpoint)
         .headers(headers)
         .body(data.to_string());
     let event_source: EventSource = EventSource::new(builder).map_err(|e|
-        format!("can't stream from {}: {}", url, e)
+        format!("can't stream from {}: {}", model_rec.endpoint, e)
     )?;
     Ok(event_source)
 }
@@ -207,31 +200,33 @@ struct EmbeddingsResultOpenAI {
     pub index: usize,
 }
 
+#[cfg(feature="vecdb")]
+#[derive(serde::Deserialize)]
+struct EmbeddingsResultOpenAINoIndex {
+    pub embedding: Vec<f32>,
+}
+
 #[cfg(feature="vecdb")]
 pub async fn get_embedding_openai_style(
     client: std::sync::Arc<AMutex<reqwest::Client>>,
     text: Vec<String>,
-    endpoint_template: &String,
-    model_name: &String,
-    api_key: &String,
+    model_rec: &EmbeddingModelRecord,
 ) -> Result<Vec<Vec<f32>>, String> {
-    if endpoint_template.is_empty() {
-        return Err(format!("no embedding_endpoint configured"));
+    if model_rec.base.endpoint.is_empty() {
+        return Err(format!("No embedding endpoint configured"));
     }
-    if api_key.is_empty() {
-        return Err(format!("cannot access embedding model, because api_key is empty"));
+    if model_rec.base.api_key.is_empty() {
+        return Err(format!("Cannot access embedding model, because api_key is empty"));
     }
     #[allow(non_snake_case)]
-    let B = text.len();
+    let B: usize = text.len();
     let payload = EmbeddingsPayloadOpenAI {
         input: text,
-        model: model_name.clone(),
+        model: model_rec.base.name.to_string(),
     };
-    let url = endpoint_template.clone();
-    let api_key_clone = api_key.clone();
     let response = client.lock().await
-        .post(&url)
-        .bearer_auth(api_key_clone.clone())
+        .post(&model_rec.base.endpoint)
+        .bearer_auth(&model_rec.base.api_key)
         .json(&payload)
         .send()
         .await
@@ -250,19 +245,36 @@ pub async fn get_embedding_openai_style(
 
     // info!("get_embedding_openai_style: {:?}", json);
     // {"data":[{"embedding":[0.0121664945...],"index":0,"object":"embedding"}, {}, {}]}
-    let unordered: Vec<EmbeddingsResultOpenAI> = match serde_json::from_value(json["data"].clone()) {
-        Ok(x) => x,
-        Err(err) => {
-            return Err(format!("get_embedding_openai_style: failed to parse unordered: {:?}", err));
-        }
-    };
+    // or {"data":[{"embedding":[0.0121664945...]}, {}, {}]} without index
+    
     let mut result: Vec<Vec<f32>> = vec![vec![]; B];
-    for ures in unordered.into_iter() {
-        let index = ures.index;
-        if index >= B {
-            return Err(format!("get_embedding_openai_style: index out of bounds: {:?}", json));
+    match serde_json::from_value::<Vec<EmbeddingsResultOpenAI>>(json["data"].clone()) {
+        Ok(unordered) => {
+            for ures in unordered.into_iter() {
+                let index = ures.index;
+                if index >= B {
+                    return Err(format!("get_embedding_openai_style: index out of bounds: {:?}", json));
+                }
+                result[index] = ures.embedding;
+            }
+        },
+        Err(_) => {
+            match serde_json::from_value::<Vec<EmbeddingsResultOpenAINoIndex>>(json["data"].clone()) {
+                Ok(ordered) => {
+                    if ordered.len() != B {
+                        return Err(format!("get_embedding_openai_style: response length mismatch: expected {}, got {}", 
+                                          B, ordered.len()));
+                    }
+                    for (i, res) in ordered.into_iter().enumerate() {
+                        result[i] = res.embedding;
+                    }
+                },
+                Err(err) => {
+                    tracing::info!("get_embedding_openai_style: failed to parse response: {:?}, {:?}", err, json);
+                    return Err(format!("get_embedding_openai_style: failed to parse response: {:?}", err));
+                }
+            }
         }
-        result[index] = ures.embedding;
     }
     Ok(result)
 }
diff --git a/refact-agent/engine/src/global_context.rs b/refact-agent/engine/src/global_context.rs
index cde382d69..e0be48c02 100644
--- a/refact-agent/engine/src/global_context.rs
+++ b/refact-agent/engine/src/global_context.rs
@@ -17,6 +17,7 @@ use tracing::{error, info};
 
 use crate::ast::ast_indexer_thread::AstIndexService;
 use crate::caps::CodeAssistantCaps;
+use crate::caps::providers::get_latest_provider_mtime;
 use crate::completion_cache::CompletionCache;
 use crate::custom_error::ScratchError;
 use crate::files_in_workspace::DocumentsState;
@@ -34,7 +35,7 @@ pub struct CommandLine {
     pub logs_stderr: bool,
     #[structopt(long, default_value="", help="Send logs to a file.")]
     pub logs_to_file: String,
-    #[structopt(long, short="u", default_value="", help="URL to start working. The first step is to fetch capabilities from $URL/refact-caps. You can supply your own caps in a local file, too, for the bring-your-own-key use case.")]
+    #[structopt(long, short="u", default_value="", help="URL to use: \"Refact\" for Cloud, or your Self-Hosted Server URL. To bring your own keys, use \"Refact\" and set up providers.")]
     pub address_url: String,
     #[structopt(long, short="k", default_value="", help="The API key to authenticate your requests, will appear in HTTP requests this binary makes.")]
     pub api_key: String,
@@ -82,7 +83,7 @@ pub struct CommandLine {
     #[structopt(long, short="w", default_value="", help="Workspace folder to find all the files. An LSP or HTTP request can override this later.")]
     pub workspace_folder: String,
 
-    #[structopt(long, help="create manually bring-your-own-key.yaml, customization.yaml and privacy.yaml and exit.")]
+    #[structopt(long, help="create yaml configs, like customization.yaml, privacy.yaml and exit.")]
     pub only_create_yaml_configs: bool,
     #[structopt(long, help="Print combined customization settings from both system defaults and customization.yaml.")]
     pub print_customization: bool,
@@ -151,11 +152,11 @@ pub struct GlobalContext {
     pub http_client_slowdown: Arc<Semaphore>,
     pub cache_dir: PathBuf,
     pub config_dir: PathBuf,
-    pub caps: Option<Arc<StdRwLock<CodeAssistantCaps>>>,
+    pub caps: Option<Arc<CodeAssistantCaps>>,
     pub caps_reading_lock: Arc<AMutex<bool>>,
     pub caps_last_error: String,
     pub caps_last_attempted_ts: u64,
-    pub tokenizer_map: HashMap< String, Arc<StdRwLock<Tokenizer>>>,
+    pub tokenizer_map: HashMap<String, Option<Arc<Tokenizer>>>,
     pub tokenizer_download_lock: Arc<AMutex<bool>>,
     pub completions_cache: Arc<StdRwLock<CompletionCache>>,
     pub telemetry: Arc<StdRwLock<telemetry_structs::Storage>>,
@@ -209,38 +210,39 @@ pub async fn migrate_to_config_folder(
 pub async fn try_load_caps_quickly_if_not_present(
     gcx: Arc<ARwLock<GlobalContext>>,
     max_age_seconds: u64,
-) -> Result<Arc<StdRwLock<CodeAssistantCaps>>, ScratchError> {
+) -> Result<Arc<CodeAssistantCaps>, ScratchError> {
     let cmdline = CommandLine::from_args();  // XXX make it Arc and don't reload all the time
+    let (caps_reading_lock, config_dir) = {
+        let gcx_locked = gcx.read().await;
+        (gcx_locked.caps_reading_lock.clone(), gcx_locked.config_dir.clone())
+    };
 
-    let caps_reading_lock: Arc<AMutex<bool>> = gcx.read().await.caps_reading_lock.clone();
     let now = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_secs();
     let caps_last_attempted_ts;
+    let latest_provider_mtime = get_latest_provider_mtime(&config_dir).await.unwrap_or(0);
 
     {
         // gcx is not locked, but a specialized async mutex is, up until caps are saved
         let _caps_reading_locked = caps_reading_lock.lock().await;
 
-        let caps_url = cmdline.address_url.clone();
-        if caps_url.to_lowercase() == "refact" || caps_url.starts_with("http") {
-            let max_age = if max_age_seconds > 0 { max_age_seconds } else { CAPS_BACKGROUND_RELOAD };
-            {
-                let mut cx_locked = gcx.write().await;
-                if cx_locked.caps_last_attempted_ts + max_age < now {
-                    cx_locked.caps = None;
-                    cx_locked.caps_last_attempted_ts = 0;
-                    caps_last_attempted_ts = 0;
-                } else {
-                    if let Some(caps_arc) = cx_locked.caps.clone() {
-                        return Ok(caps_arc.clone());
-                    }
-                    caps_last_attempted_ts = cx_locked.caps_last_attempted_ts;
+        let max_age = if max_age_seconds > 0 { max_age_seconds } else { CAPS_BACKGROUND_RELOAD };
+        {
+            let mut cx_locked = gcx.write().await;
+            if cx_locked.caps_last_attempted_ts + max_age < now || latest_provider_mtime >= cx_locked.caps_last_attempted_ts {
+                cx_locked.caps = None;
+                cx_locked.caps_last_attempted_ts = 0;
+                caps_last_attempted_ts = 0;
+            } else {
+                if let Some(caps_arc) = cx_locked.caps.clone() {
+                    return Ok(caps_arc.clone());
                 }
-            }
-            if caps_last_attempted_ts + CAPS_RELOAD_BACKOFF > now {
-                let gcx_locked = gcx.write().await;
-                return Err(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, gcx_locked.caps_last_error.clone()));
+                caps_last_attempted_ts = cx_locked.caps_last_attempted_ts;
             }
         }
+        if caps_last_attempted_ts + CAPS_RELOAD_BACKOFF > now {
+            let gcx_locked = gcx.write().await;
+            return Err(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, gcx_locked.caps_last_error.clone()));
+        }
 
         let caps_result = crate::caps::load_caps(
             cmdline,
@@ -275,9 +277,8 @@ pub async fn look_for_piggyback_fields(
         let new_caps_version = dict.get("caps_version").and_then(|v| v.as_i64()).unwrap_or(0);
         if new_caps_version > 0 {
             if let Some(caps) = gcx_locked.caps.clone() {
-                let caps_locked = caps.read().unwrap();
-                if caps_locked.caps_version < new_caps_version {
-                    info!("detected biggyback caps version {} is newer than the current version {}", new_caps_version, caps_locked.caps_version);
+                if caps.caps_version < new_caps_version {
+                    info!("detected biggyback caps version {} is newer than the current version {}", new_caps_version, caps.caps_version);
                     gcx_locked.caps = None;
                     gcx_locked.caps_last_attempted_ts = 0;
                 }
@@ -395,14 +396,4 @@ pub async fn create_global_context(
     let gcx = Arc::new(ARwLock::new(cx));
     crate::files_in_workspace::watcher_init(gcx.clone()).await;
     (gcx, ask_shutdown_receiver, cmdline)
-}
-
-pub async fn is_metadata_supported(gcx: Arc<ARwLock<GlobalContext>>) -> bool {
-    let gcx_locked = gcx.read().await;
-    if let Some(caps_arc) = gcx_locked.caps.clone() {
-        if let Ok(caps) = caps_arc.read() {
-            return caps.support_metadata;
-        }
-    }
-    false
-}
+}
\ No newline at end of file
diff --git a/refact-agent/engine/src/http/routers/v1.rs b/refact-agent/engine/src/http/routers/v1.rs
index 88c6055c3..ca8298e66 100644
--- a/refact-agent/engine/src/http/routers/v1.rs
+++ b/refact-agent/engine/src/http/routers/v1.rs
@@ -1,7 +1,5 @@
 use axum::Router;
-use axum::routing::get;
-use axum::routing::post;
-use axum::routing::delete;
+use axum::routing::{get, post, delete};
 use tower_http::cors::CorsLayer;
 
 use crate::http::utils::telemetry_middleware;
@@ -31,7 +29,11 @@ use crate::http::routers::v1::gui_help_handlers::handle_v1_fullpath;
 use crate::http::routers::v1::subchat::{handle_v1_subchat, handle_v1_subchat_single};
 use crate::http::routers::v1::sync_files::handle_v1_sync_files_extract_tar;
 use crate::http::routers::v1::system_prompt::handle_v1_prepend_system_prompt_and_maybe_more_initial_messages;
+use crate::http::routers::v1::providers::{handle_v1_providers, handle_v1_provider_templates,
+    handle_v1_get_model, handle_v1_get_provider, handle_v1_models, handle_v1_post_model, handle_v1_post_provider,
+    handle_v1_delete_model, handle_v1_delete_provider, handle_v1_model_default, handle_v1_completion_model_families};
 
+#[cfg(feature = "vecdb")]
 #[cfg(feature = "vecdb")]
 use crate::http::routers::v1::vecdb::{handle_v1_vecdb_search, handle_v1_vecdb_status};
 #[cfg(feature="vecdb")]
@@ -66,6 +68,7 @@ pub mod sync_files;
 pub mod system_prompt;
 pub mod telemetry_chat;
 pub mod telemetry_network;
+pub mod providers;
 
 mod file_edit_tools;
 #[cfg(feature = "vecdb")]
@@ -140,6 +143,18 @@ pub fn make_v1_router() -> Router {
         .route("/links", post(handle_v1_links))
 
         .route("/file_edit_tool_dry_run", post(handle_v1_file_edit_tool_dry_run))
+        
+        .route("/providers", get(handle_v1_providers))
+        .route("/provider-templates", get(handle_v1_provider_templates))
+        .route("/provider", get(handle_v1_get_provider))
+        .route("/provider", post(handle_v1_post_provider))
+        .route("/provider", delete(handle_v1_delete_provider))
+        .route("/models", get(handle_v1_models))
+        .route("/model", get(handle_v1_get_model))
+        .route("/model", post(handle_v1_post_model))
+        .route("/model", delete(handle_v1_delete_model))
+        .route("/model-defaults", get(handle_v1_model_default))
+        .route("/completion-model-families", get(handle_v1_completion_model_families))
 
         // experimental
         .route("/get-dashboard-plots", get(get_dashboard_plots))
diff --git a/refact-agent/engine/src/http/routers/v1/at_commands.rs b/refact-agent/engine/src/http/routers/v1/at_commands.rs
index 22865f61a..5bc80df18 100644
--- a/refact-agent/engine/src/http/routers/v1/at_commands.rs
+++ b/refact-agent/engine/src/http/routers/v1/at_commands.rs
@@ -4,7 +4,6 @@ use hyper::{Body, Response, StatusCode};
 use indexmap::IndexMap;
 use serde::{Deserialize, Serialize};
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 use serde_json::{json, Value};
 use tokio::sync::RwLock as ARwLock;
 use tokio::sync::Mutex as AMutex;
@@ -14,10 +13,11 @@ use tokenizers::Tokenizer;
 use tracing::info;
 
 use crate::at_commands::execute_at::run_at_commands_locally;
-use crate::cached_tokenizers;
+use crate::tokens;
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::at_commands::execute_at::{execute_at_commands_in_query, parse_words_from_line};
 use crate::call_validation::{ChatMeta, PostprocessSettings, SubchatParameters};
+use crate::caps::resolve_chat_model;
 use crate::custom_error::ScratchError;
 use crate::global_context::try_load_caps_quickly_if_not_present;
 use crate::global_context::GlobalContext;
@@ -49,6 +49,8 @@ struct CommandPreviewPost {
     #[serde(default)]
     model: String,
     #[serde(default)]
+    provider: String,
+    #[serde(default)]
     pub meta: ChatMeta,
 }
 
@@ -129,7 +131,7 @@ pub async fn handle_v1_command_completion(
         .unwrap())
 }
 
-async fn count_tokens(tokenizer_arc: Arc<StdRwLock<Tokenizer>>, messages: &Vec<ChatMessage>) -> Result<u64, ScratchError> {
+async fn count_tokens(tokenizer_arc: Option<Arc<Tokenizer>>, messages: &Vec<ChatMessage>) -> Result<u64, ScratchError> {
     let mut accum: u64 = 0;
 
     for message in messages {
@@ -169,38 +171,25 @@ pub async fn handle_v1_command_preview(
     };
 
     let caps = crate::global_context::try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?;
-    let (model_name, recommended_model_record) = {
-        let caps_locked = caps.read().unwrap();
-        let tmp = crate::caps::which_model_to_use(
-                &caps_locked.code_chat_models,
-                &post.model,
-                &caps_locked.code_chat_default_model,
-            );
-        match tmp {
-            Ok(x) => (x.0, x.1.clone()),
-            Err(e) => {
-                tracing::warn!("can't find model: {}", e);
-                return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("can't find model: {}", e)))?;
-            }
-        }
-    };
-    let tokenizer_arc: Arc<StdRwLock<Tokenizer>> = match cached_tokenizers::cached_tokenizer(caps.clone(), global_context.clone(), model_name.clone()).await {
+    let model_rec = resolve_chat_model(caps, &post.model)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+    let tokenizer_arc = match tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await {
         Ok(x) => x,
         Err(e) => {
-            tracing::warn!("can't load tokenizer for preview: {}", e);
-            return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("can't load tokenizer for preview: {}", e)))?;
+            tracing::error!(e);
+            return Err(ScratchError::new(StatusCode::BAD_REQUEST, e));
         }
     };
 
-    let ccx: Arc<AMutex<AtCommandsContext>> = Arc::new(AMutex::new(AtCommandsContext::new(
+    let ccx = Arc::new(AMutex::new(AtCommandsContext::new(
         global_context.clone(),
-        recommended_model_record.n_ctx,
+        model_rec.base.n_ctx,
         crate::http::routers::v1::chat::CHAT_TOP_N,
         true,
         vec![],
         "".to_string(),
         false,
-        model_name.clone(),
+        model_rec.base.id.clone(),
     ).await));
 
     let (messages_for_postprocessing, vec_highlights) = execute_at_commands_in_query(
@@ -208,7 +197,7 @@ pub async fn handle_v1_command_preview(
         &mut query
     ).await;
 
-    let rag_n_ctx = max_tokens_for_rag_chat(recommended_model_record.n_ctx, 512);  // real maxgen may be different -- comes from request
+    let rag_n_ctx = max_tokens_for_rag_chat(model_rec.base.n_ctx, 512);  // real maxgen may be different -- comes from request
 
     let mut preview: Vec<ChatMessage> = vec![];
     for exec_result in messages_for_postprocessing.iter() {
@@ -277,8 +266,8 @@ pub async fn handle_v1_command_preview(
     Ok(Response::builder()
         .status(StatusCode::OK)
         .body(Body::from(serde_json::to_string_pretty(
-            &json!({"messages": preview, "model": model_name, "highlight": highlights, 
-                "current_context": tokens_number, "number_context": recommended_model_record.n_ctx})
+            &json!({"messages": preview, "model": model_rec.base.id, "highlight": highlights, 
+                "current_context": tokens_number, "number_context": model_rec.base.n_ctx})
         ).unwrap()))
         .unwrap())
 }
@@ -291,8 +280,11 @@ pub async fn handle_v1_at_command_execute(
         .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?;
 
     let caps = try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?;
-    let tokenizer = cached_tokenizers::cached_tokenizer(caps, global_context.clone(), post.model_name.clone()).await
-        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error loading tokenizer: {}", e)))?;
+    let model_rec = resolve_chat_model(caps, &post.model_name)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+
+    let tokenizer = tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
 
     let mut ccx = AtCommandsContext::new(
         global_context.clone(),
@@ -302,7 +294,7 @@ pub async fn handle_v1_at_command_execute(
         vec![],
         "".to_string(),
         false,
-        post.model_name.clone(),
+        model_rec.base.id.clone(),
     ).await;
     ccx.subchat_tool_parameters = post.subchat_tool_parameters.clone();
     ccx.postprocess_parameters = post.postprocess_parameters.clone();
diff --git a/refact-agent/engine/src/http/routers/v1/at_tools.rs b/refact-agent/engine/src/http/routers/v1/at_tools.rs
index 168b297f2..7dd3050ad 100644
--- a/refact-agent/engine/src/http/routers/v1/at_tools.rs
+++ b/refact-agent/engine/src/http/routers/v1/at_tools.rs
@@ -9,8 +9,8 @@ use serde_json::Value;
 use tokio::sync::{Mutex as AMutex, RwLock as ARwLock};
 
 use crate::at_commands::at_commands::AtCommandsContext;
-use crate::cached_tokenizers;
 use crate::call_validation::{ChatMessage, ChatMeta, ChatToolCall, PostprocessSettings, SubchatParameters};
+use crate::caps::resolve_chat_model;
 use crate::http::http_post_json;
 use crate::http::routers::v1::chat::CHAT_TOP_N;
 use crate::integrations::docker::docker_container_manager::docker_container_get_host_lsp_port_to_connect;
@@ -226,8 +226,10 @@ pub async fn handle_v1_tools_execute(
       .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?;
 
     let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
-    let tokenizer = cached_tokenizers::cached_tokenizer(caps, gcx.clone(), tools_execute_post.model_name.clone()).await
-        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error loading tokenizer: {}", e)))?;
+    let model_rec = resolve_chat_model(caps, &tools_execute_post.model_name)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
+    let tokenizer = crate::tokens::cached_tokenizer(gcx.clone(), &model_rec.base).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
 
     let mut ccx = AtCommandsContext::new(
         gcx.clone(),
@@ -237,7 +239,7 @@ pub async fn handle_v1_tools_execute(
         tools_execute_post.messages.clone(),
         tools_execute_post.chat_id.clone(),
         false,
-        tools_execute_post.model_name.clone(),
+        model_rec.base.id.clone(),
     ).await;
     ccx.subchat_tool_parameters = tools_execute_post.subchat_tool_parameters.clone();
     ccx.postprocess_parameters = tools_execute_post.postprocess_parameters.clone();
@@ -246,7 +248,7 @@ pub async fn handle_v1_tools_execute(
     let mut at_tools = tools_merged_and_filtered(gcx.clone(), false).await.map_err(|e|{
         ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error getting at_tools: {}", e))
     })?;
-    let (messages, tools_ran) = run_tools( // todo: fix typo "runned"
+    let (messages, tools_ran) = run_tools(
         ccx_arc.clone(), &mut at_tools, tokenizer.clone(), tools_execute_post.maxgen, &tools_execute_post.messages, &tools_execute_post.style
     ).await.map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error running tools: {}", e)))?;
 
diff --git a/refact-agent/engine/src/http/routers/v1/caps.rs b/refact-agent/engine/src/http/routers/v1/caps.rs
index b3d46b4f9..2c84a3af9 100644
--- a/refact-agent/engine/src/http/routers/v1/caps.rs
+++ b/refact-agent/engine/src/http/routers/v1/caps.rs
@@ -33,8 +33,7 @@ pub async fn handle_v1_caps(
             ));
         }
     };
-    let caps_locked = caps_arc.read().unwrap();
-    let body = serde_json::to_string_pretty(&*caps_locked).unwrap();
+    let body = serde_json::to_string_pretty(&*caps_arc).unwrap();
     let response = Response::builder()
         .header("Content-Type", "application/json")
         .body(Body::from(body))
diff --git a/refact-agent/engine/src/http/routers/v1/chat.rs b/refact-agent/engine/src/http/routers/v1/chat.rs
index 217351e8c..db61cfbaa 100644
--- a/refact-agent/engine/src/http/routers/v1/chat.rs
+++ b/refact-agent/engine/src/http/routers/v1/chat.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 use tokio::sync::Mutex as AMutex;
 use tokio::sync::RwLock as ARwLock;
 
@@ -9,11 +8,11 @@ use hyper::{Body, Response, StatusCode};
 use serde_json::Value;
 
 use crate::call_validation::{ChatContent, ChatMessage, ChatPost, ChatMode};
-use crate::caps::CodeAssistantCaps;
+use crate::caps::resolve_chat_model;
 use crate::custom_error::ScratchError;
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::git::checkpoints::create_workspace_checkpoint;
-use crate::global_context::{is_metadata_supported, GlobalContext, SharedGlobalContext};
+use crate::global_context::{GlobalContext, SharedGlobalContext};
 use crate::integrations::docker::docker_container_manager::docker_container_check_status_or_start;
 
 
@@ -56,33 +55,6 @@ pub fn available_tools_by_chat_mode(current_tools: Vec<Value>, chat_mode: &ChatM
 
 pub const CHAT_TOP_N: usize = 12;
 
-pub async fn lookup_chat_scratchpad(
-    caps: Arc<StdRwLock<CodeAssistantCaps>>,
-    chat_post: &ChatPost,
-) -> Result<(String, String, serde_json::Value, usize, bool, bool, bool), String> {
-    let caps_locked = caps.read().unwrap();
-    let (model_name, recommended_model_record) =
-        crate::caps::which_model_to_use(
-            &caps_locked.code_chat_models,
-            &chat_post.model,
-            &caps_locked.code_chat_default_model,
-        )?;
-    let (sname, patch) = crate::caps::which_scratchpad_to_use(
-        &recommended_model_record.supports_scratchpads,
-        &chat_post.scratchpad,
-        &recommended_model_record.default_scratchpad,
-    )?;
-    Ok((
-        model_name,
-        sname.clone(),
-        patch.clone(),
-        recommended_model_record.n_ctx,
-        recommended_model_record.supports_tools,
-        recommended_model_record.supports_multimodality,
-        recommended_model_record.supports_clicks,
-    ))
-}
-
 pub async fn handle_v1_chat_completions(
     // standard openai-style handler
     Extension(gcx): Extension<SharedGlobalContext>,
@@ -110,7 +82,7 @@ pub fn deserialize_messages_from_post(messages: &Vec<serde_json::Value>) -> Resu
     Ok(messages)
 }
 
-fn fill_sampling_params(chat_post: &mut ChatPost, n_ctx: usize, model_name: &String) {
+fn fill_sampling_params(chat_post: &mut ChatPost, n_ctx: usize, model_id: &str) {
     let mut max_tokens = if chat_post.increase_max_tokens {
         chat_post.max_tokens.unwrap_or(16384)
     } else {
@@ -121,7 +93,7 @@ fn fill_sampling_params(chat_post: &mut ChatPost, n_ctx: usize, model_name: &Str
     if chat_post.parameters.max_new_tokens == 0 {
         chat_post.parameters.max_new_tokens = max_tokens;
     }
-    chat_post.model = model_name.clone();
+    chat_post.model = model_id.to_string();
     chat_post.parameters.n = chat_post.n;
     chat_post.parameters.temperature = Some(chat_post.parameters.temperature.unwrap_or(chat_post.temperature.unwrap_or(0.0)));
 }
@@ -163,21 +135,17 @@ async fn _chat(
     }
 
     let caps = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
-    let (model_name, scratchpad_name, scratchpad_patch, n_ctx, supports_tools, supports_multimodality, supports_clicks) = lookup_chat_scratchpad(
-        caps.clone(),
-        &chat_post,
-    ).await.map_err(|e| {
-        ScratchError::new(StatusCode::BAD_REQUEST, format!("{}", e))
-    })?;
-    fill_sampling_params(&mut chat_post, n_ctx, &model_name);
+    let model_rec = resolve_chat_model(caps, &chat_post.model)
+            .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e.to_string()))?;
+    fill_sampling_params(&mut chat_post, model_rec.base.n_ctx, &model_rec.base.id);
 
     // extra validation to catch {"query": "Frog", "scope": "workspace"}{"query": "Toad", "scope": "workspace"}
     let re = regex::Regex::new(r"\{.*?\}").unwrap();
     for message in messages.iter_mut() {
-        if !supports_multimodality {
+        if !model_rec.supports_multimodality {
             if let ChatContent::Multimodal(content) = &message.content {
                 if content.iter().any(|el| el.is_image()) {
-                    return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("model '{}' does not support multimodality", model_name)));
+                    return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("model '{}' does not support multimodality", model_rec.base.id)));
                 }
             }
             message.content = ChatContent::SimpleText(message.content.content_text_only());
@@ -221,12 +189,10 @@ async fn _chat(
             .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
     }
 
-    let meta = {
-        if is_metadata_supported(gcx.clone()).await {
-            Some(chat_post.meta.clone())
-        } else {
-            None
-        }
+    let meta = if model_rec.base.support_metadata {
+        Some(chat_post.meta.clone())
+    } else {
+        None
     };
 
     if chat_post.checkpoints_enabled {
@@ -253,16 +219,11 @@ async fn _chat(
     // chat_post.stream = Some(false);  // for debugging 400 errors that are hard to debug with streaming (because "data: " is not present and the error message is ignored by the library)
     let mut scratchpad = crate::scratchpads::create_chat_scratchpad(
         gcx.clone(),
-        caps,
-        model_name.clone(),
         &mut chat_post,
         &messages,
         true,
-        &scratchpad_name,
-        &scratchpad_patch,
+        &model_rec,
         allow_at,
-        supports_tools,
-        supports_clicks,
     ).await.map_err(|e|
         ScratchError::new(StatusCode::BAD_REQUEST, e)
     )?;
@@ -281,13 +242,13 @@ async fn _chat(
     // }
     let mut ccx = AtCommandsContext::new(
         gcx.clone(),
-        n_ctx,
+        model_rec.base.n_ctx,
         CHAT_TOP_N,
         false,
         messages.clone(),
         chat_post.meta.chat_id.clone(),
         should_execute_remotely,
-        model_name.clone(),
+        model_rec.base.id.clone(),
     ).await;
     ccx.subchat_tool_parameters = chat_post.subchat_tool_parameters.clone();
     ccx.postprocess_parameters = chat_post.postprocess_parameters.clone();
@@ -298,7 +259,7 @@ async fn _chat(
             ccx_arc.clone(),
             &mut scratchpad,
             "chat".to_string(),
-            model_name,
+            &model_rec.base,
             &mut chat_post.parameters,
             chat_post.only_deterministic_messages,
             meta
@@ -308,7 +269,7 @@ async fn _chat(
             ccx_arc.clone(),
             scratchpad,
             "chat-stream".to_string(),
-            model_name,
+            model_rec.base.clone(),
             chat_post.parameters.clone(),
             chat_post.only_deterministic_messages,
             meta
diff --git a/refact-agent/engine/src/http/routers/v1/code_completion.rs b/refact-agent/engine/src/http/routers/v1/code_completion.rs
index 281fabe51..af6aace0f 100644
--- a/refact-agent/engine/src/http/routers/v1/code_completion.rs
+++ b/refact-agent/engine/src/http/routers/v1/code_completion.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 use tokio::sync::RwLock as ARwLock;
 use tokio::sync::Mutex as AMutex;
 
@@ -8,8 +7,7 @@ use axum::response::Result;
 use hyper::{Body, Response, StatusCode};
 use tracing::info;
 use crate::call_validation::{CodeCompletionPost, code_completion_post_validate};
-use crate::caps;
-use crate::caps::CodeAssistantCaps;
+use crate::caps::resolve_completion_model;
 use crate::completion_cache;
 use crate::custom_error::ScratchError;
 use crate::global_context::GlobalContext;
@@ -21,41 +19,6 @@ use crate::at_commands::at_commands::AtCommandsContext;
 
 const CODE_COMPLETION_TOP_N: usize = 5;
 
-async fn _lookup_code_completion_scratchpad(
-    caps: Arc<StdRwLock<CodeAssistantCaps>>,
-    code_completion_post: &CodeCompletionPost,
-    look_for_multiline_model: bool,
-) -> Result<(String, String, serde_json::Value, usize), String> {
-    let caps_locked = caps.read().unwrap();
-
-    let (model_name, modelrec) = if !look_for_multiline_model 
-        || caps_locked.multiline_code_completion_default_model.is_empty() {
-        caps::which_model_to_use(
-            &caps_locked.code_completion_models,
-            &code_completion_post.model,
-            &caps_locked.code_completion_default_model,
-        )?
-    } else {
-        caps::which_model_to_use(
-            &caps_locked.code_completion_models,
-            &code_completion_post.model,
-            &caps_locked.multiline_code_completion_default_model,
-        )?
-    };
-    let (sname, patch) = caps::which_scratchpad_to_use(
-        &modelrec.supports_scratchpads,
-        &code_completion_post.scratchpad,
-        &modelrec.default_scratchpad,
-    )?;
-    let caps_completion_n_ctx = caps_locked.code_completion_n_ctx;
-    let mut n_ctx = modelrec.n_ctx;
-    if caps_completion_n_ctx > 0 && n_ctx > caps_completion_n_ctx {
-        // the model might be capable of a bigger context, but server (i.e. admin) tells us to use smaller (for example because latency)
-        n_ctx = caps_completion_n_ctx;
-    }
-    Ok((model_name, sname.clone(), patch.clone(), n_ctx))
-}
-
 pub async fn handle_v1_code_completion(
     gcx: Arc<ARwLock<GlobalContext>>,
     code_completion_post: &mut CodeCompletionPost,
@@ -67,27 +30,15 @@ pub async fn handle_v1_code_completion(
         .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e))?;
 
     let caps = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
-    let maybe = _lookup_code_completion_scratchpad(
-        caps.clone(),
-        &code_completion_post,
-        code_completion_post.inputs.multiline
-    ).await;
-    if maybe.is_err() {
-        // On error, this will also invalidate caps each 10 seconds, allows to overcome empty caps situation
-        let _ = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 10).await;
-        return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("{}", maybe.unwrap_err())))
-    }
-    let (model_name, scratchpad_name, scratchpad_patch, n_ctx) = maybe.unwrap();
+    let model_rec = resolve_completion_model(caps, &code_completion_post.model, true)
+        .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e.to_string()))?;
     if code_completion_post.parameters.max_new_tokens == 0 {
         code_completion_post.parameters.max_new_tokens = 50;
     }
     if code_completion_post.model == "" {
-        code_completion_post.model = model_name.clone();
-    }
-    if code_completion_post.scratchpad == "" {
-        code_completion_post.scratchpad = scratchpad_name.clone();
+        code_completion_post.model = model_rec.base.id.clone();
     }
-    info!("chosen completion model: {}, scratchpad: {}", code_completion_post.model, code_completion_post.scratchpad);
+    info!("chosen completion model: {}, scratchpad: {}", code_completion_post.model, model_rec.scratchpad);
     code_completion_post.parameters.temperature = Some(code_completion_post.parameters.temperature.unwrap_or(0.2));
     let (cache_arc, tele_storage) = {
         let gcx_locked = gcx.write().await;
@@ -109,31 +60,26 @@ pub async fn handle_v1_code_completion(
     let ast_service_opt = gcx.read().await.ast_service.clone();
     let mut scratchpad = scratchpads::create_code_completion_scratchpad(
         gcx.clone(),
-        caps,
-        model_name.clone(),
+        &model_rec,
         &code_completion_post.clone(),
-        &scratchpad_name,
-        &scratchpad_patch,
         cache_arc.clone(),
         tele_storage.clone(),
         ast_service_opt
-    ).await.map_err(|e|
-        ScratchError::new(StatusCode::BAD_REQUEST, e)
-    )?;
-    let ccx: Arc<AMutex<AtCommandsContext>> = Arc::new(AMutex::new(AtCommandsContext::new(
+    ).await.map_err(|e| ScratchError::new(StatusCode::BAD_REQUEST, e))?;
+    let ccx = Arc::new(AMutex::new(AtCommandsContext::new(
         gcx.clone(),
-        n_ctx,
+        model_rec.base.n_ctx,
         CODE_COMPLETION_TOP_N,
         true,
         vec![],
         "".to_string(),
         false,
-        code_completion_post.model.clone(),
+        model_rec.base.id.clone(),
     ).await));
     if !code_completion_post.stream {
-        crate::restream::scratchpad_interaction_not_stream(ccx.clone(), &mut scratchpad, "completion".to_string(), model_name, &mut code_completion_post.parameters, false, None).await
+        crate::restream::scratchpad_interaction_not_stream(ccx.clone(), &mut scratchpad, "completion".to_string(), &model_rec.base, &mut code_completion_post.parameters, false, None).await
     } else {
-        crate::restream::scratchpad_interaction_stream(ccx.clone(), scratchpad, "completion-stream".to_string(), model_name, code_completion_post.parameters.clone(), false, None).await
+        crate::restream::scratchpad_interaction_stream(ccx.clone(), scratchpad, "completion-stream".to_string(), model_rec.base.clone(), code_completion_post.parameters.clone(), false, None).await
     }
 }
 
@@ -161,11 +107,8 @@ pub async fn handle_v1_code_completion_prompt(
         .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e))?;
 
     let caps = crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
-    let maybe = _lookup_code_completion_scratchpad(caps.clone(), &post, post.inputs.multiline).await;
-    if maybe.is_err() {
-        return Err(ScratchError::new(StatusCode::BAD_REQUEST, format!("{}", maybe.unwrap_err())))
-    }
-    let (model_name, scratchpad_name, scratchpad_patch, n_ctx) = maybe.unwrap();
+    let model_rec = resolve_completion_model(caps, &post.model, true)
+            .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, e.to_string()))?;
 
     // don't need cache, but go along
     let (cache_arc, tele_storage) = {
@@ -176,11 +119,8 @@ pub async fn handle_v1_code_completion_prompt(
     let ast_service_opt = gcx.read().await.ast_service.clone();
     let mut scratchpad = scratchpads::create_code_completion_scratchpad(
         gcx.clone(),
-        caps,
-        model_name.clone(),
+        &model_rec,
         &post,
-        &scratchpad_name,
-        &scratchpad_patch,
         cache_arc.clone(),
         tele_storage.clone(),
         ast_service_opt
@@ -188,15 +128,15 @@ pub async fn handle_v1_code_completion_prompt(
         ScratchError::new(StatusCode::BAD_REQUEST, e)
     )?;
 
-    let ccx: Arc<AMutex<AtCommandsContext>> = Arc::new(AMutex::new(AtCommandsContext::new(
+    let ccx = Arc::new(AMutex::new(AtCommandsContext::new(
         gcx.clone(),
-        n_ctx,
+        model_rec.base.n_ctx,
         CODE_COMPLETION_TOP_N,
         true,
         vec![],
         "".to_string(),
         false,
-        model_name.clone(),
+        model_rec.base.id.clone(),
     ).await));
     let prompt = scratchpad.prompt(ccx.clone(), &mut post.parameters).await.map_err(|e|
         ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Prompt: {}", e))
diff --git a/refact-agent/engine/src/http/routers/v1/customization.rs b/refact-agent/engine/src/http/routers/v1/customization.rs
index 23951ce37..11b436f3e 100644
--- a/refact-agent/engine/src/http/routers/v1/customization.rs
+++ b/refact-agent/engine/src/http/routers/v1/customization.rs
@@ -5,7 +5,7 @@ use std::sync::Arc;
 use tokio::sync::RwLock as ARwLock;
 
 use crate::global_context::GlobalContext;
-use crate::custom_error::ScratchError;
+use crate::custom_error::{ScratchError, YamlError};
 use crate::yaml_configs::customization_loader::load_customization;
 
 
@@ -24,7 +24,7 @@ pub async fn handle_v1_customization(
     Extension(global_context): Extension<Arc<ARwLock<GlobalContext>>>,
     _body_bytes: hyper::body::Bytes,
 ) -> Result<Response<Body>, ScratchError> {
-    let mut error_log: Vec<crate::integrations::setting_up_integrations::YamlError> = Vec::new();
+    let mut error_log: Vec<YamlError> = Vec::new();
     let tconfig = load_customization(global_context.clone(), false, &mut error_log).await;
 
     let mut response_body = serde_json::to_value(tconfig).unwrap();
diff --git a/refact-agent/engine/src/http/routers/v1/dashboard.rs b/refact-agent/engine/src/http/routers/v1/dashboard.rs
index 96b9c08ba..526e28502 100644
--- a/refact-agent/engine/src/http/routers/v1/dashboard.rs
+++ b/refact-agent/engine/src/http/routers/v1/dashboard.rs
@@ -67,7 +67,7 @@ pub async fn get_dashboard_plots(
     let caps = crate::global_context::try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?;
     let (http_client, api_key, url) = {
         let gcx_locked = global_context.read().await;
-        (gcx_locked.http_client.clone(), gcx_locked.cmdline.api_key.clone(), caps.read().unwrap().telemetry_basic_retrieve_my_own.clone())
+        (gcx_locked.http_client.clone(), gcx_locked.cmdline.api_key.clone(), caps.telemetry_basic_retrieve_my_own.clone())
     };
     if url.is_empty() {
         return Err(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, "Error: no url provided from caps".to_string()));
diff --git a/refact-agent/engine/src/http/routers/v1/links.rs b/refact-agent/engine/src/http/routers/v1/links.rs
index be7c6f0e7..8ed788502 100644
--- a/refact-agent/engine/src/http/routers/v1/links.rs
+++ b/refact-agent/engine/src/http/routers/v1/links.rs
@@ -6,17 +6,14 @@ use serde::{Deserialize, Serialize};
 use tokio::sync::RwLock as ARwLock;
 
 use crate::call_validation::{ChatMessage, ChatMeta, ChatMode};
+use crate::caps::resolve_chat_model;
 use crate::custom_error::ScratchError;
-use crate::global_context::GlobalContext;
+use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext};
 use crate::integrations::go_to_configuration_message;
 use crate::agentic::generate_follow_up_message::generate_follow_up_message;
 use crate::git::commit_info::{get_commit_information_from_current_changes, generate_commit_messages};
 // use crate::http::routers::v1::git::GitCommitPost;
 
-
-// TODO: remove this dirty hack when we add light_chat_model in caps
-const LIGHT_MODEL_NAME: &str = "gpt-4o-mini";
-
 #[derive(Deserialize, Clone, Debug)]
 pub struct LinksPost {
     messages: Vec<ChatMessage>,
@@ -225,8 +222,8 @@ pub async fn handle_v1_links(
         for e in integration_yaml_errors {
             links.push(Link {
                 link_action: LinkAction::Goto,
-                link_text: format!("Syntax error in {}", crate::nicer_logs::last_n_chars(&e.integr_config_path, 20)),
-                link_goto: Some(format!("SETTINGS:{}", e.integr_config_path)),
+                link_text: format!("Syntax error in {}", crate::nicer_logs::last_n_chars(&e.path, 20)),
+                link_goto: Some(format!("SETTINGS:{}", e.path)),
                 link_summary_path: None,
                 link_tooltip: format!("Error at line {}: {}", e.error_line, e.error_msg),
                 ..Default::default()
@@ -347,8 +344,13 @@ pub async fn handle_v1_links(
         && post.messages.len() > 2
         && post.messages.last().map(|x| x.role == "assistant").unwrap_or(false)
     {
+        let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await?;
+        let model_id = match resolve_chat_model(caps.clone(), &caps.defaults.chat_light_model) {
+            Ok(light_model) => light_model.base.id.clone(),
+            Err(_) => post.model_name.clone(),
+        };
         let follow_up_response = generate_follow_up_message(
-            post.messages.clone(), gcx.clone(), LIGHT_MODEL_NAME.to_string(), &post.model_name, &post.meta.chat_id
+            post.messages.clone(), gcx.clone(), &model_id, &post.meta.chat_id
         ).await
             .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error generating follow-up message: {}", e)))?;
         new_chat_suggestion = follow_up_response.topic_changed;
diff --git a/refact-agent/engine/src/http/routers/v1/providers.rs b/refact-agent/engine/src/http/routers/v1/providers.rs
new file mode 100644
index 000000000..a8741aa6f
--- /dev/null
+++ b/refact-agent/engine/src/http/routers/v1/providers.rs
@@ -0,0 +1,715 @@
+use axum::extract::Query;
+use axum::Extension;
+use axum::http::{Response, StatusCode};
+use hyper::Body;
+use std::path::Path;
+use serde::{Deserialize, Serialize};
+use serde_json::json;
+use std::sync::Arc;
+use tokio::sync::RwLock as ARwLock;
+
+use crate::call_validation::ModelType;
+use crate::caps::{ChatModelRecord, CompletionModelFamily, CompletionModelRecord, EmbeddingModelRecord, HasBaseModelRecord};
+use crate::custom_error::{MapErrToString, ScratchError};
+use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext};
+use crate::caps::providers::{get_known_models, get_provider_from_server, get_provider_from_template_and_config_file, get_provider_model_default_settings_ui, get_provider_templates, read_providers_d, CapsProvider};
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ProviderDTO {
+    name: String,
+    endpoint_style: String,
+    chat_endpoint: String,
+    completion_endpoint: String,
+    embedding_endpoint: String,
+    api_key: String,
+    #[serde(default)]
+    tokenizer_api_key: String,
+
+    chat_default_model: String,
+    chat_thinking_model: String,
+    chat_light_model: String,
+
+    enabled: bool,
+    #[serde(default)]
+    readonly: bool,
+    #[serde(default = "default_true")]
+    supports_completion: bool,
+}
+
+fn default_true() -> bool { true }
+
+impl ProviderDTO {
+    pub fn from_caps_provider(provider: CapsProvider, readonly: bool) -> Self {
+        ProviderDTO {
+            name: provider.name,
+            endpoint_style: provider.endpoint_style,
+            chat_endpoint: provider.chat_endpoint,
+            completion_endpoint: if provider.supports_completion { provider.completion_endpoint } else { String::new() },
+            embedding_endpoint: provider.embedding_endpoint,
+            api_key: provider.api_key,
+            tokenizer_api_key: provider.tokenizer_api_key,
+            chat_default_model: provider.defaults.chat_default_model,
+            chat_light_model: provider.defaults.chat_light_model,
+            chat_thinking_model: provider.defaults.chat_thinking_model,
+            enabled: provider.enabled,
+            readonly: readonly,
+            supports_completion: provider.supports_completion,
+        }
+    }
+}
+
+#[derive(Serialize, Debug)]
+pub struct ModelLightResponse {
+    name: String,
+    enabled: bool,
+    removable: bool,
+    user_configured: bool,
+}
+
+impl ModelLightResponse {
+    pub fn new<T: HasBaseModelRecord>(model: T) -> Self {
+        ModelLightResponse {
+            name: model.base().name.clone(),
+            enabled: model.base().enabled,
+            removable: model.base().removable,
+            user_configured: model.base().user_configured,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ChatModelDTO {
+    n_ctx: usize,
+    name: String,
+    tokenizer: String,
+    enabled: bool,
+
+    supports_tools: bool,
+    supports_multimodality: bool,
+    supports_clicks: bool,
+    supports_agent: bool,
+    supports_reasoning: Option<String>,
+    supports_boost_reasoning: bool,
+    default_temperature: Option<f32>,
+
+    #[serde(skip_deserializing, rename = "type", default = "model_type_chat")]
+    model_type: ModelType,
+}
+
+fn model_type_chat() -> ModelType { ModelType::Chat }
+
+impl ChatModelDTO {
+    pub fn new(chat_model: ChatModelRecord) -> Self {
+        ChatModelDTO {
+            n_ctx: chat_model.base.n_ctx,
+            name: chat_model.base.name,
+            tokenizer: chat_model.base.tokenizer,
+            enabled: chat_model.base.enabled,
+            supports_tools: chat_model.supports_tools,
+            supports_multimodality: chat_model.supports_multimodality,
+            supports_clicks: chat_model.supports_clicks,
+            supports_agent: chat_model.supports_agent,
+            supports_reasoning: chat_model.supports_reasoning,
+            supports_boost_reasoning: chat_model.supports_boost_reasoning,
+            default_temperature: chat_model.default_temperature,
+            model_type: ModelType::Chat,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct CompletionModelDTO {
+    n_ctx: usize,
+    name: String,
+    enabled: bool,
+    model_family: Option<CompletionModelFamily>,
+    #[serde(skip_deserializing, rename = "type", default = "model_type_completion")]
+    model_type: ModelType,
+}
+
+fn model_type_completion() -> ModelType { ModelType::Completion }
+
+impl CompletionModelDTO {
+    pub fn new(completion_model: CompletionModelRecord) -> Self {
+        CompletionModelDTO {
+            n_ctx: completion_model.base.n_ctx,
+            name: completion_model.base.name,
+            enabled: completion_model.base.enabled,
+            model_family: completion_model.model_family,
+            model_type: ModelType::Completion,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct EmbeddingModelDTO {
+    n_ctx: usize,
+    name: String,
+    tokenizer: String,
+    enabled: bool,
+
+    embedding_size: i32,
+    rejection_threshold: f32,
+    embedding_batch: usize,
+
+    #[serde(skip_deserializing, rename = "type", default = "model_type_embedding")]
+    model_type: ModelType,
+}
+
+fn model_type_embedding() -> ModelType { ModelType::Embedding }
+
+impl EmbeddingModelDTO {
+    pub fn new(embedding_model: EmbeddingModelRecord) -> Self {
+        EmbeddingModelDTO {
+            n_ctx: embedding_model.base.n_ctx,
+            name: embedding_model.base.name,
+            tokenizer: embedding_model.base.tokenizer,
+            enabled: embedding_model.base.enabled,
+            embedding_size: embedding_model.embedding_size,
+            rejection_threshold: embedding_model.rejection_threshold,
+            embedding_batch: embedding_model.embedding_batch,
+            model_type: ModelType::Embedding,
+        }
+    }
+}
+
+pub async fn handle_v1_providers(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+) -> Response<Body> {
+    let config_dir = {
+        let gcx_locked = gcx.read().await;
+        gcx_locked.config_dir.clone()
+    };
+
+    let template_names = get_provider_templates().keys().collect::<Vec<_>>();
+    let (providers, read_errors) = read_providers_d(Vec::new(), &config_dir).await;
+
+    let mut result = providers.into_iter()
+        .filter(|p| template_names.contains(&&p.name))
+        .map(|p| json!({
+            "name": p.name,
+            "enabled": p.enabled,
+            "readonly": false,
+            "supports_completion": p.supports_completion
+        }))
+        .collect::<Vec<_>>();
+
+    match crate::global_context::try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => {
+            if !caps.cloud_name.is_empty() {
+                result.retain(|p| p["name"] != caps.cloud_name);
+                result.insert(0, json!({
+                    "name": caps.cloud_name.clone(),
+                    "enabled": true,
+                    "readonly": true,
+                    "supports_completion": true
+                }));
+            }
+        },
+        Err(e) => {
+            tracing::error!("Failed to load caps, server provider will not be included: {}", e);
+        }
+    }
+
+    Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(serde_json::to_string(&json!({
+            "providers": result,
+            "error_log": read_errors
+        })).unwrap()))
+        .unwrap()
+}
+
+pub async fn handle_v1_provider_templates() -> Response<Body> {
+    let provider_templates = get_provider_templates();
+
+    let result = provider_templates.keys().map(|name| { json!({
+        "name": name
+    })}).collect::<Vec<_>>();
+
+    Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(serde_json::to_string(&json!({
+            "provider_templates": result
+        })).unwrap()))
+        .unwrap()
+}
+
+#[derive(Deserialize)]
+pub struct ProviderQueryParams {
+    #[serde(rename = "provider-name")]
+    provider_name: String,
+}
+
+pub async fn handle_v1_get_provider(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+    Query(params): Query<ProviderQueryParams>,
+) -> Result<Response<Body>, ScratchError> {
+    let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider_name,
+        Err(e) => {
+            tracing::error!("Failed to load caps: {}", e);
+            false
+        }
+    };
+
+    let provider_dto = if use_server_provider {
+        let provider = get_provider_from_server(gcx.clone()).await
+            .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+        ProviderDTO::from_caps_provider(provider, true)
+    } else {
+        let config_dir = gcx.read().await.config_dir.clone();
+        let provider = get_provider_from_template_and_config_file(&config_dir, &params.provider_name, false, true).await
+            .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+        ProviderDTO::from_caps_provider(provider, false)
+    };
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(serde_json::to_string(&provider_dto).unwrap()))
+        .unwrap())
+}
+
+pub async fn handle_v1_post_provider(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+    body_bytes: hyper::body::Bytes,
+) -> Result<Response<Body>, ScratchError> {
+    let provider_dto = serde_json::from_slice::<ProviderDTO>(&body_bytes)
+        .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing provider: {}", e)))?;
+
+    let config_dir = gcx.read().await.config_dir.clone();
+    let provider_path = config_dir.join("providers.d").join(format!("{}.yaml", provider_dto.name));
+
+    let provider_template = get_provider_templates().get(&provider_dto.name).cloned()
+        .ok_or(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, "Provider template not found".to_string()))?;
+
+    let mut file_value = read_yaml_file_as_value_if_exists(&provider_path).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+
+    update_yaml_field_if_needed(&mut file_value, "endpoint_style",
+        provider_dto.endpoint_style, provider_template.endpoint_style);
+    update_yaml_field_if_needed(&mut file_value, "api_key",
+        provider_dto.api_key, provider_template.api_key);
+    update_yaml_field_if_needed(&mut file_value, "tokenizer_api_key",
+        provider_dto.tokenizer_api_key, provider_template.tokenizer_api_key);
+    update_yaml_field_if_needed(&mut file_value, "chat_endpoint",
+        provider_dto.chat_endpoint, provider_template.chat_endpoint);
+    update_yaml_field_if_needed(&mut file_value, "completion_endpoint",
+        provider_dto.completion_endpoint, provider_template.completion_endpoint);
+    update_yaml_field_if_needed(&mut file_value, "embedding_endpoint",
+        provider_dto.embedding_endpoint, provider_template.embedding_endpoint);
+    update_yaml_field_if_needed(&mut file_value, "chat_default_model",
+        provider_dto.chat_default_model, provider_template.defaults.chat_default_model);
+    update_yaml_field_if_needed(&mut file_value, "chat_light_model",
+        provider_dto.chat_light_model, provider_template.defaults.chat_light_model);
+    update_yaml_field_if_needed(&mut file_value, "chat_thinking_model",
+        provider_dto.chat_thinking_model, provider_template.defaults.chat_thinking_model);
+    file_value["enabled"] = serde_yaml::Value::Bool(provider_dto.enabled);
+
+    let file_content = serde_yaml::to_string(&file_value)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error parsing provider file: {}", e)))?;
+    tokio::fs::write(&provider_path, file_content.as_bytes()).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error writing provider file: {}", e)))?;
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(json!({ "success": true }).to_string()))
+        .unwrap())
+}
+
+fn update_yaml_field_if_needed(
+    file_value: &mut serde_yaml::Value,
+    field_name: &str,
+    dto_value: String,
+    template_value: String,
+) {
+    if file_value.get(field_name).is_some() || dto_value != template_value {
+        file_value[field_name] = serde_yaml::Value::String(dto_value);
+    }
+}
+
+async fn read_yaml_file_as_value_if_exists(path: &Path) -> Result<serde_yaml::Value, String> {
+    match tokio::fs::read_to_string(path).await {
+        Ok(content) => {
+            serde_yaml::from_str::<serde_yaml::Value>(&content)
+                .map_err_with_prefix("Error parsing file:")
+        },
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+            Ok(serde_yaml::Value::Mapping(serde_yaml::Mapping::new()))
+        },
+        Err(e) => {
+            Err(format!("Error reading file: {e}"))
+        }
+    }
+}
+
+pub async fn handle_v1_delete_provider(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+    Query(params): Query<ProviderQueryParams>,
+) -> Result<Response<Body>, ScratchError> {
+    let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider_name,
+        Err(e) => {
+            tracing::error!("Failed to load caps: {}", e);
+            false
+        }
+    };
+
+    if use_server_provider {
+        return Err(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY,
+            "Cannot delete server provider".to_string()));
+    }
+
+    let config_dir = gcx.read().await.config_dir.clone();
+
+    if !get_provider_templates().contains_key(&params.provider_name) {
+        return Err(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY,
+            format!("Provider template '{}' not found", params.provider_name)));
+    }
+
+    let provider_path = config_dir.join("providers.d")
+        .join(format!("{}.yaml", params.provider_name));
+
+    if !provider_path.exists() {
+        return Err(ScratchError::new(StatusCode::NOT_FOUND,
+            format!("Provider '{}' does not exist", params.provider_name)));
+    }
+
+    tokio::fs::remove_file(&provider_path).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR,
+            format!("Failed to delete provider file: {}", e)))?;
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(json!({ "success": true }).to_string()))
+        .unwrap())
+}
+
+pub async fn handle_v1_models(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+    Query(params): Query<ProviderQueryParams>,
+) -> Result<Response<Body>, ScratchError> {
+    let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider_name,
+        Err(e) => {
+            tracing::error!("Failed to load caps: {}", e);
+            false
+        }
+    };
+
+    let provider = if use_server_provider {
+        get_provider_from_server(gcx.clone()).await
+            .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?
+    } else {
+        let config_dir = gcx.read().await.config_dir.clone();
+        get_provider_from_template_and_config_file(&config_dir, &params.provider_name, false, true).await
+            .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?
+    };
+
+    let result = serde_json::json!({
+        "chat_models": provider.chat_models.into_iter()
+            .map(|(_, model)| ModelLightResponse::new(model)).collect::<Vec<_>>(),
+        "completion_models": if provider.supports_completion {
+            provider.completion_models.into_iter()
+                .map(|(_, model)| ModelLightResponse::new(model)).collect::<Vec<_>>()
+        } else {
+            Vec::<ModelLightResponse>::new()
+        },
+        "embedding_model": ModelLightResponse::new(provider.embedding_model),
+    });
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(serde_json::to_string(&result).unwrap()))
+        .unwrap())
+}
+
+#[derive(Deserialize)]
+pub struct ModelQueryParams {
+    model: Option<String>,
+    provider: String,
+    #[serde(rename = "type")]
+    model_type: ModelType,
+}
+
+#[derive(Deserialize)]
+pub struct ModelDefaultQueryParams {
+    provider: String,
+    #[serde(rename = "type")]
+    model_type: ModelType,
+}
+
+pub async fn handle_v1_get_model(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+    Query(params): Query<ModelQueryParams>,
+) -> Result<Response<Body>, ScratchError> {
+    let use_server_provider = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => !caps.cloud_name.is_empty() && caps.cloud_name == params.provider,
+        Err(e) => {
+            tracing::error!("Failed to load caps: {}", e);
+            false
+        }
+    };
+
+    let provider = if use_server_provider {
+        get_provider_from_server(gcx.clone()).await
+            .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?
+    } else {
+        let config_dir = gcx.read().await.config_dir.clone();
+        get_provider_from_template_and_config_file(&config_dir, &params.provider, false, true).await
+            .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?
+    };
+
+    let model = match params.model_type {
+        ModelType::Chat => {
+            let model_name = params.model.ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?;
+            let chat_model = provider.chat_models.get(&model_name).cloned()
+                .ok_or(ScratchError::new(StatusCode::NOT_FOUND, format!("Chat model {} not found for provider {}", model_name, params.provider)))?;
+            serde_json::json!(ChatModelDTO::new(chat_model))
+        },
+        ModelType::Completion => {
+            if !provider.supports_completion {
+                return Err(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Provider {} does not support completion", params.provider)));
+            }
+            let model_name = params.model.ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?;
+            let completion_model = provider.completion_models.get(&model_name).cloned()
+                .ok_or(ScratchError::new(StatusCode::NOT_FOUND, format!("Completion model {} not found for provider {}", model_name, params.provider)))?;
+            serde_json::json!(CompletionModelDTO::new(completion_model))
+        },
+        ModelType::Embedding => {
+            serde_json::json!(EmbeddingModelDTO::new(provider.embedding_model))
+        },
+    };
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(serde_json::to_string(&model).unwrap()))
+        .unwrap())
+}
+
+#[derive(Deserialize)]
+pub struct ModelPOST {
+    pub provider: String,
+    pub model: serde_json::Value,
+    #[serde(rename = "type")]
+    pub model_type: ModelType,
+}
+
+pub async fn handle_v1_post_model(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+    body_bytes: hyper::body::Bytes,
+) -> Result<Response<Body>, ScratchError> {
+    let post = serde_json::from_slice::<ModelPOST>(&body_bytes)
+        .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing json: {}", e)))?;
+
+    let config_dir = gcx.read().await.config_dir.clone();
+    let provider_path = config_dir.join("providers.d").join(format!("{}.yaml", post.provider));
+
+    let _provider_template = get_provider_templates().get(&post.provider)
+        .ok_or(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, "Provider template not found".to_string()))?;
+
+    let mut file_value = read_yaml_file_as_value_if_exists(&provider_path).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+
+    fn get_or_create_model_mapping(file_value: &mut serde_yaml::Value, models_key: &str, model_name: &str) -> serde_yaml::Mapping {
+        if !file_value.get(models_key).is_some() {
+            file_value[models_key] = serde_yaml::Value::Mapping(serde_yaml::Mapping::new());
+        }
+
+        let model_entry = if file_value[models_key].get(model_name).is_some() {
+            file_value[models_key][model_name].clone()
+        } else {
+            serde_yaml::Value::Mapping(serde_yaml::Mapping::new())
+        };
+
+        model_entry.as_mapping().unwrap_or(&serde_yaml::Mapping::new()).clone()
+    }
+
+    match post.model_type {
+        ModelType::Chat => {
+            let chat_model = serde_json::from_value::<ChatModelDTO>(post.model)
+                .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing model: {}", e)))?;
+            let models_key = "chat_models";
+
+            let mut model_value = get_or_create_model_mapping(&mut file_value, models_key, &chat_model.name);
+
+            model_value.insert("n_ctx".into(), chat_model.n_ctx.into());
+            model_value.insert("tokenizer".into(), chat_model.tokenizer.into());
+            model_value.insert("enabled".into(), chat_model.enabled.into());
+
+            model_value.insert("supports_tools".into(), chat_model.supports_tools.into());
+            model_value.insert("supports_multimodality".into(), chat_model.supports_multimodality.into());
+            model_value.insert("supports_clicks".into(), chat_model.supports_clicks.into());
+            model_value.insert("supports_agent".into(), chat_model.supports_agent.into());
+            model_value.insert("supports_boost_reasoning".into(), chat_model.supports_boost_reasoning.into());
+
+            model_value.insert("supports_reasoning".into(),
+                match chat_model.supports_reasoning {
+                    Some(supports_reasoning) => supports_reasoning.into(),
+                    None => serde_yaml::Value::Null,
+                }
+            );
+            model_value.insert("default_temperature".into(),
+                match chat_model.default_temperature {
+                    Some(default_temperature) => serde_yaml::Value::Number(serde_yaml::Number::from(default_temperature as f64)),
+                    None => serde_yaml::Value::Null,
+                }
+            );
+
+            file_value[models_key][chat_model.name] = model_value.into();
+        },
+        ModelType::Completion => {
+            let completion_model = serde_json::from_value::<CompletionModelDTO>(post.model)
+                .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing model: {}", e)))?;
+            let models_key = "completion_models";
+
+            let mut model_value = get_or_create_model_mapping(&mut file_value, models_key, &completion_model.name);
+
+            if let Some(model_family) = completion_model.model_family {
+                let family_model_rec = get_known_models().completion_models.get(&model_family.to_string())
+                    .expect(&format!("Model family {} not found in known models", model_family.to_string()));
+
+                model_value.insert("model_family".into(), model_family.to_string().into());
+                model_value.insert("scratchpad".into(), family_model_rec.scratchpad.clone().into());
+                model_value.insert("scratchpad_patch".into(), serde_yaml::from_str(&family_model_rec.scratchpad_patch.to_string()).unwrap());
+                model_value.insert("tokenizer".into(), family_model_rec.base.tokenizer.clone().into());
+            }
+
+            model_value.insert("n_ctx".into(), completion_model.n_ctx.into());
+            model_value.insert("enabled".into(), completion_model.enabled.into());
+
+            file_value[models_key][completion_model.name] = model_value.into();
+        },
+        ModelType::Embedding => {
+            let embedding_model = serde_json::from_value::<EmbeddingModelDTO>(post.model)
+                .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("Error parsing model: {}", e)))?;
+            let mut model_value = serde_yaml::Mapping::new();
+
+            model_value.insert("n_ctx".into(), embedding_model.n_ctx.into());
+            model_value.insert("name".into(), embedding_model.name.clone().into());
+            model_value.insert("tokenizer".into(), embedding_model.tokenizer.into());
+            model_value.insert("enabled".into(), embedding_model.enabled.into());
+
+            model_value.insert("embedding_size".into(), embedding_model.embedding_size.into());
+            model_value.insert("rejection_threshold".into(), serde_yaml::Value::Number(serde_yaml::Number::from(embedding_model.rejection_threshold as f64)));
+            model_value.insert("embedding_batch".into(), embedding_model.embedding_batch.into());
+
+            file_value["embedding_model"] = model_value.into();
+        },
+    }
+
+    let file_content = serde_yaml::to_string(&file_value)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error parsing provider file: {}", e)))?;
+    tokio::fs::write(&provider_path, file_content.as_bytes()).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error writing provider file: {}", e)))?;
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(json!({ "success": true }).to_string()))
+        .unwrap())
+}
+
+pub async fn handle_v1_delete_model(
+    Extension(gcx): Extension<Arc<ARwLock<GlobalContext>>>,
+    Query(params): Query<ModelQueryParams>,
+) -> Result<Response<Body>, ScratchError> {
+    let config_dir = gcx.read().await.config_dir.clone();
+    let provider_path = config_dir.join("providers.d").join(format!("{}.yaml", params.provider));
+
+    let _provider_template = get_provider_templates().get(&params.provider)
+        .ok_or(ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, "Provider template not found".to_string()))?;
+
+    let mut file_value = read_yaml_file_as_value_if_exists(&provider_path).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
+
+    match params.model_type {
+        ModelType::Chat => {
+            let model_name = params.model.as_ref()
+                .ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?;
+            let models_key = "chat_models";
+
+            if !file_value.get(models_key).is_some() || !file_value[models_key].get(model_name).is_some() {
+                return Err(ScratchError::new(StatusCode::NOT_FOUND,
+                    format!("Chat model {} not found for provider {}", model_name, params.provider)));
+            }
+
+            if let Some(mapping) = file_value[models_key].as_mapping_mut() {
+                mapping.remove(model_name);
+            }
+        },
+        ModelType::Completion => {
+            let model_name = params.model.as_ref()
+                .ok_or_else(|| ScratchError::new(StatusCode::BAD_REQUEST, "Missing `model` query parameter".to_string()))?;
+            let models_key = "completion_models";
+
+            if !file_value.get(models_key).is_some() || !file_value[models_key].get(model_name).is_some() {
+                return Err(ScratchError::new(StatusCode::NOT_FOUND,
+                    format!("Completion model {} not found for provider {}", model_name, params.provider)));
+            }
+
+            if let Some(mapping) = file_value[models_key].as_mapping_mut() {
+                mapping.remove(model_name);
+            }
+        },
+        ModelType::Embedding => {
+            if !file_value.get("embedding_model").is_some() {
+                return Err(ScratchError::new(StatusCode::NOT_FOUND,
+                    format!("Embedding model not found for provider {}", params.provider)));
+            }
+
+            file_value.as_mapping_mut().unwrap().remove("embedding_model");
+        },
+    }
+
+    let file_content = serde_yaml::to_string(&file_value)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error parsing provider file: {}", e)))?;
+    tokio::fs::write(&provider_path, file_content.as_bytes()).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error writing provider file: {}", e)))?;
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(json!({ "success": true }).to_string()))
+        .unwrap())
+}
+
+pub async fn handle_v1_model_default(
+    Query(params): Query<ModelDefaultQueryParams>,
+) -> Result<Response<Body>, ScratchError> {
+    let model_defaults = get_provider_model_default_settings_ui().get(&params.provider).ok_or_else(||
+        ScratchError::new(StatusCode::NOT_FOUND, "Provider not found".to_string())
+    )?;
+
+    let response_json = match params.model_type {
+        ModelType::Chat => serde_json::json!(ChatModelDTO::new(model_defaults.chat.clone())),
+        ModelType::Completion => serde_json::json!(CompletionModelDTO::new(model_defaults.completion.clone())),
+        ModelType::Embedding => serde_json::json!(EmbeddingModelDTO::new(model_defaults.embedding.clone())),
+    };
+
+    Ok(Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(serde_json::to_string(&response_json).unwrap()))
+        .unwrap())
+}
+
+pub async fn handle_v1_completion_model_families() -> Response<Body> {
+    let response_json = json!({
+        "model_families": CompletionModelFamily::all_variants()
+            .into_iter().map(|family| family.to_string()).collect::<Vec<_>>()
+    });
+
+    Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/json")
+        .body(Body::from(serde_json::to_string(&response_json).unwrap()))
+        .unwrap()
+}
\ No newline at end of file
diff --git a/refact-agent/engine/src/http/routers/v1/subchat.rs b/refact-agent/engine/src/http/routers/v1/subchat.rs
index 9e402135c..282dc82f8 100644
--- a/refact-agent/engine/src/http/routers/v1/subchat.rs
+++ b/refact-agent/engine/src/http/routers/v1/subchat.rs
@@ -5,10 +5,11 @@ use axum::http::{Response, StatusCode};
 use hyper::Body;
 use serde::Deserialize;
 use tokio::sync::RwLock as ARwLock;
+use crate::caps::resolve_chat_model;
 use crate::subchat::{subchat, subchat_single};
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::custom_error::ScratchError;
-use crate::global_context::GlobalContext;
+use crate::global_context::{try_load_caps_quickly_if_not_present, GlobalContext};
 use crate::http::routers::v1::chat::deserialize_messages_from_post;
 
 
@@ -29,6 +30,7 @@ pub async fn handle_v1_subchat(
     let post = serde_json::from_slice::<SubChatPost>(&body_bytes)
         .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?;
     let messages = deserialize_messages_from_post(&post.messages)?;
+    let caps = try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?;
 
     let top_n = 7;
     let fake_n_ctx = 4096;
@@ -43,9 +45,11 @@ pub async fn handle_v1_subchat(
         post.model_name.clone(),
     ).await));
 
+    let model = resolve_chat_model(caps, &post.model_name)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
     let new_messages = subchat(
         ccx.clone(),
-        post.model_name.as_str(),
+        &model.base.id,
         messages,
         post.tools_turn_on,
         post.wrap_up_depth,
@@ -56,11 +60,11 @@ pub async fn handle_v1_subchat(
         None,
         None,
         None,
-        Some(false),  
+        Some(false),
     ).await.map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error: {}", e)))?;
 
     let new_messages = new_messages.into_iter()
-        .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None)).collect::<Vec<_>>())
+        .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None, &model.base.id)).collect::<Vec<_>>())
        .collect::<Vec<Vec<_>>>();
     let resp_serialised = serde_json::to_string_pretty(&new_messages).unwrap();
     Ok(
@@ -93,6 +97,7 @@ pub async fn handle_v1_subchat_single(
     let post = serde_json::from_slice::<SubChatSinglePost>(&body_bytes)
         .map_err(|e| ScratchError::new(StatusCode::UNPROCESSABLE_ENTITY, format!("JSON problem: {}", e)))?;
     let messages = deserialize_messages_from_post(&post.messages)?;
+    let caps = try_load_caps_quickly_if_not_present(global_context.clone(), 0).await?;
 
     let top_n = 7;
     let fake_n_ctx = 4096;
@@ -107,9 +112,11 @@ pub async fn handle_v1_subchat_single(
         post.model_name.clone(),
     ).await));
 
+    let model = resolve_chat_model(caps, &post.model_name)
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e))?;
     let new_messages = subchat_single(
         ccx.clone(),
-        post.model_name.as_str(),
+        &model.base.id,
         messages,
         Some(post.tools_turn_on),
         post.tool_choice,
@@ -125,7 +132,7 @@ pub async fn handle_v1_subchat_single(
     ).await.map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error: {}", e)))?;
 
     let new_messages = new_messages.into_iter()
-        .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None)).collect::<Vec<_>>())
+        .map(|msgs|msgs.iter().map(|msg|msg.into_value(&None, &model.base.id)).collect::<Vec<_>>())
         .collect::<Vec<Vec<_>>>();
     let resp_serialised = serde_json::to_string_pretty(&new_messages).unwrap();
     Ok(
diff --git a/refact-agent/engine/src/http/routers/v1/vecdb.rs b/refact-agent/engine/src/http/routers/v1/vecdb.rs
index 9bced7ed4..efa848198 100644
--- a/refact-agent/engine/src/http/routers/v1/vecdb.rs
+++ b/refact-agent/engine/src/http/routers/v1/vecdb.rs
@@ -3,7 +3,6 @@ use axum::Extension;
 use hyper::{Body, Response, StatusCode};
 use serde::{Deserialize, Serialize};
 
-use crate::caps::get_custom_embedding_api_key;
 use crate::custom_error::ScratchError;
 use crate::global_context::SharedGlobalContext;
 use crate::vecdb::vdb_structs::VecdbSearch;
@@ -26,11 +25,9 @@ pub async fn handle_v1_vecdb_search(
         ScratchError::new(StatusCode::BAD_REQUEST, format!("JSON problem: {}", e))
     })?;
 
-    let api_key = get_custom_embedding_api_key(gcx.clone()).await?;
-    let cx_locked = gcx.read().await;
-
-    let search_res = match *cx_locked.vec_db.lock().await {
-        Some(ref db) => db.vecdb_search(post.query.to_string(), post.top_n, None, &api_key).await,
+    let vec_db = gcx.read().await.vec_db.clone();
+    let search_res = match *vec_db.lock().await {
+        Some(ref db) => db.vecdb_search(post.query.to_string(), post.top_n, None).await,
         None => {
             return Err(ScratchError::new(
                 StatusCode::INTERNAL_SERVER_ERROR, NO_VECDB.to_string(),
diff --git a/refact-agent/engine/src/integrations/config_chat.rs b/refact-agent/engine/src/integrations/config_chat.rs
index a4e8e4a8d..9951b4385 100644
--- a/refact-agent/engine/src/integrations/config_chat.rs
+++ b/refact-agent/engine/src/integrations/config_chat.rs
@@ -135,12 +135,7 @@ pub async fn mix_config_messages(
     let custom = crate::yaml_configs::customization_loader::load_customization(gcx.clone(), true, &mut error_log).await;
     // XXX: let model know there are errors
     for e in error_log.iter() {
-        tracing::error!(
-            "{}:{} {:?}",
-            crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-            e.error_line,
-            e.error_msg,
-        );
+        tracing::error!("{e}");
     }
 
     let sp: &crate::yaml_configs::customization_loader::SystemPrompt = custom.system_prompts.get("configurator").unwrap();
diff --git a/refact-agent/engine/src/integrations/docker/integr_isolation.rs b/refact-agent/engine/src/integrations/docker/integr_isolation.rs
index 62aa59ba8..547c1a86f 100644
--- a/refact-agent/engine/src/integrations/docker/integr_isolation.rs
+++ b/refact-agent/engine/src/integrations/docker/integr_isolation.rs
@@ -1,6 +1,5 @@
 use std::sync::Arc;
 use serde::{Serialize, Deserialize};
-use serde_inline_default::serde_inline_default;
 use serde_json::Value;
 use async_trait::async_trait;
 use tokio::sync::RwLock as ARwLock;
@@ -10,7 +9,6 @@ use crate::integrations::utils::{serialize_num_to_str, deserialize_str_to_num, s
 use crate::integrations::docker::docker_container_manager::Port;
 use crate::integrations::integr_abstract::{IntegrationTrait, IntegrationCommon};
 
-#[serde_inline_default]
 #[derive(Clone, Serialize, Deserialize, Default, Debug)]
 pub struct SettingsIsolation {
     pub container_workspace_folder: String,
@@ -24,12 +22,14 @@ pub struct SettingsIsolation {
     pub ports: Vec<Port>,
     #[serde(serialize_with = "serialize_num_to_str", deserialize_with = "deserialize_str_to_num")]
     pub keep_containers_alive_for_x_minutes: u64,
-    #[serde_inline_default("sh".to_string())]
+    #[serde(default = "default_docker_entrypoint")]
     pub docker_entrypoint: String,
     #[serde(default)]
     pub docker_extra_params: Vec<String>,
 }
 
+fn default_docker_entrypoint() -> String { "sh".to_string() }
+
 #[derive(Clone, Default)]
 pub struct IntegrationIsolation {
     pub common:  IntegrationCommon,
diff --git a/refact-agent/engine/src/integrations/integr_cmdline.rs b/refact-agent/engine/src/integrations/integr_cmdline.rs
index 8795a6dcd..502bcda0c 100644
--- a/refact-agent/engine/src/integrations/integr_cmdline.rs
+++ b/refact-agent/engine/src/integrations/integr_cmdline.rs
@@ -21,7 +21,7 @@ use crate::call_validation::{ChatMessage, ChatContent, ContextEnum};
 use crate::postprocessing::pp_command_output::{CmdlineOutputFilter, output_mini_postprocessing};
 use crate::integrations::integr_abstract::{IntegrationTrait, IntegrationCommon, IntegrationConfirmation};
 use crate::integrations::utils::{serialize_num_to_str, deserialize_str_to_num, serialize_opt_num_to_str, deserialize_str_to_opt_num};
-use crate::integrations::setting_up_integrations::YamlError;
+use crate::custom_error::YamlError;
 
 
 #[derive(Deserialize, Serialize, Clone, Default)]
diff --git a/refact-agent/engine/src/integrations/integr_cmdline_service.rs b/refact-agent/engine/src/integrations/integr_cmdline_service.rs
index 9d2f7c057..e0939adc8 100644
--- a/refact-agent/engine/src/integrations/integr_cmdline_service.rs
+++ b/refact-agent/engine/src/integrations/integr_cmdline_service.rs
@@ -17,7 +17,7 @@ use crate::integrations::process_io_utils::{blocking_read_until_token_or_timeout
 use crate::integrations::sessions::IntegrationSession;
 use crate::integrations::integr_abstract::{IntegrationTrait, IntegrationCommon, IntegrationConfirmation};
 use crate::integrations::integr_cmdline::*;
-use crate::integrations::setting_up_integrations::YamlError;
+use crate::custom_error::YamlError;
 
 
 const REALLY_HORRIBLE_ROUNDTRIP: u64 = 3000;   // 3000 should be a really bad ping via internet, just in rare case it's a remote port
diff --git a/refact-agent/engine/src/integrations/integr_shell.rs b/refact-agent/engine/src/integrations/integr_shell.rs
index 90460a053..802a00ad7 100644
--- a/refact-agent/engine/src/integrations/integr_shell.rs
+++ b/refact-agent/engine/src/integrations/integr_shell.rs
@@ -16,7 +16,7 @@ use crate::tools::tools_description::{ToolParam, Tool, ToolDesc, MatchConfirmDen
 use crate::call_validation::{ChatMessage, ChatContent, ContextEnum};
 use crate::postprocessing::pp_command_output::CmdlineOutputFilter;
 use crate::integrations::integr_abstract::{IntegrationCommon, IntegrationTrait};
-use crate::integrations::setting_up_integrations::YamlError;
+use crate::custom_error::YamlError;
 use crate::tools::tools_execute::command_should_be_denied;
 
 
diff --git a/refact-agent/engine/src/integrations/process_io_utils.rs b/refact-agent/engine/src/integrations/process_io_utils.rs
index 71c6a6ce8..42860d49c 100644
--- a/refact-agent/engine/src/integrations/process_io_utils.rs
+++ b/refact-agent/engine/src/integrations/process_io_utils.rs
@@ -172,7 +172,7 @@ pub async fn execute_command(mut cmd: Command, timeout_secs: u64, cmd_str: &str)
     let mut child = ChildWithKillOnDrop(child);
 
     tokio::time::timeout(
-        tokio::time::Duration::from_secs(timeout_secs), 
+        tokio::time::Duration::from_secs(timeout_secs),
         wait_with_output(child.0.inner_mut())
     ).await
         .map_err(|_| format!("command '{cmd_str}' timed out after {timeout_secs} seconds"))?
diff --git a/refact-agent/engine/src/integrations/project_summary_chat.rs b/refact-agent/engine/src/integrations/project_summary_chat.rs
index ed2647e2c..4c24df266 100644
--- a/refact-agent/engine/src/integrations/project_summary_chat.rs
+++ b/refact-agent/engine/src/integrations/project_summary_chat.rs
@@ -18,12 +18,7 @@ pub async fn mix_project_summary_messages(
     let mut error_log = Vec::new();
     let custom = crate::yaml_configs::customization_loader::load_customization(gcx.clone(), true, &mut error_log).await;
     for e in error_log.iter() {
-        tracing::error!(
-            "{}:{} {:?}",
-            crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-            e.error_line,
-            e.error_msg,
-        );
+        tracing::error!("{e}");
     }
 
 
diff --git a/refact-agent/engine/src/integrations/running_integrations.rs b/refact-agent/engine/src/integrations/running_integrations.rs
index ee5ac33d2..e49676075 100644
--- a/refact-agent/engine/src/integrations/running_integrations.rs
+++ b/refact-agent/engine/src/integrations/running_integrations.rs
@@ -2,6 +2,7 @@ use std::sync::Arc;
 use indexmap::IndexMap;
 use tokio::sync::RwLock as ARwLock;
 
+use crate::custom_error::YamlError;
 use crate::tools::tools_description::Tool;
 use crate::global_context::GlobalContext;
 use crate::integrations::integr_abstract::IntegrationTrait;
@@ -29,14 +30,14 @@ pub async fn load_integration_tools(
 }
 
 /// Loads and set up integrations from config files.
-/// 
-/// If `include_paths_matching` is `None`, all integrations are loaded, 
+///
+/// If `include_paths_matching` is `None`, all integrations are loaded,
 /// otherwise only those matching `include_paths_matching` glob patterns.
 pub async fn load_integrations(
     gcx: Arc<ARwLock<GlobalContext>>,
     allow_experimental: bool,
     include_paths_matching: &[String],
-) -> (IndexMap<String, Box<dyn IntegrationTrait + Send + Sync>>, Vec<crate::integrations::setting_up_integrations::YamlError>) {
+) -> (IndexMap<String, Box<dyn IntegrationTrait + Send + Sync>>, Vec<YamlError>) {
     let active_project_path = crate::files_correction::get_active_project_path(gcx.clone()).await;
     let (config_dirs, global_config_dir) = crate::integrations::setting_up_integrations::get_config_dirs(gcx.clone(), &active_project_path).await;
     let (integrations_yaml_path, is_inside_container) = {
@@ -44,7 +45,7 @@ pub async fn load_integrations(
         (gcx_locked.cmdline.integrations_yaml.clone(), gcx_locked.cmdline.inside_container)
     };
 
-    let mut error_log: Vec<crate::integrations::setting_up_integrations::YamlError> = Vec::new();
+    let mut error_log: Vec<YamlError> = Vec::new();
     let lst: Vec<&str> = crate::integrations::integrations_list(allow_experimental);
     let vars_for_replacements = crate::integrations::setting_up_integrations::get_vars_for_replacements(gcx.clone(), &mut error_log).await;
     let records = crate::integrations::setting_up_integrations::read_integrations_d(
@@ -73,8 +74,8 @@ pub async fn load_integrations(
         let should_be_fine = integr.integr_settings_apply(gcx.clone(), rec.integr_config_path.clone(), &rec.config_unparsed).await;
         if let Err(err) = should_be_fine {
             let error_line = err.line();
-            error_log.push(crate::integrations::setting_up_integrations::YamlError {
-                integr_config_path: rec.integr_config_path.clone(),
+            error_log.push(YamlError {
+                path: rec.integr_config_path.clone(),
                 error_line,
                 error_msg: format!("failed to apply settings: {}", err),
             });
@@ -83,12 +84,7 @@ pub async fn load_integrations(
     }
 
     for e in error_log.iter() {
-        tracing::error!(
-            "{}:{} {:?}",
-            crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-            e.error_line,
-            e.error_msg,
-        );
+        tracing::error!("{e}");
     }
 
     (integrations_map, error_log)
diff --git a/refact-agent/engine/src/integrations/setting_up_integrations.rs b/refact-agent/engine/src/integrations/setting_up_integrations.rs
index 1614e6b31..501229b83 100644
--- a/refact-agent/engine/src/integrations/setting_up_integrations.rs
+++ b/refact-agent/engine/src/integrations/setting_up_integrations.rs
@@ -8,29 +8,12 @@ use serde_json::{json, Value};
 use tokio::sync::RwLock as ARwLock;
 use tokio::fs as async_fs;
 use tokio::io::AsyncWriteExt;
+use crate::custom_error::YamlError;
 use crate::global_context::GlobalContext;
 use crate::files_correction::any_glob_matches_path;
 // use crate::tools::tools_description::Tool;
 // use crate::yaml_configs::create_configs::{integrations_enabled_cfg, read_yaml_into_value};
 
-
-#[derive(Serialize, Default)]
-pub struct YamlError {
-    pub integr_config_path: String,
-    pub error_line: usize,  // starts with 1, zero if invalid
-    pub error_msg: String,
-}
-
-impl From<(&str, &serde_yaml::Error)> for YamlError {
-    fn from((path, err): (&str, &serde_yaml::Error)) -> Self {
-        YamlError {
-            integr_config_path: path.to_string(),
-            error_line: err.location().map(|loc| loc.line()).unwrap_or(0),
-            error_msg: err.to_string(),
-        }
-    }
-}
-
 #[derive(Serialize, Default, Debug, Clone)]
 pub struct IntegrationRecord {
     pub project_path: String,
@@ -120,7 +103,7 @@ pub fn read_integrations_d(
             Err(e) => {
                 tracing::warn!("failed to read {}: {}", integrations_yaml_path, e);
                 error_log.push(YamlError {
-                    integr_config_path: integrations_yaml_path.clone(),
+                    path: integrations_yaml_path.clone(),
                     error_line: 0,
                     error_msg: e.to_string(),
                 });
@@ -213,7 +196,7 @@ pub fn read_integrations_d(
                 },
                 Err(e) => {
                     error_log.push(YamlError {
-                        integr_config_path: path_str.to_string(),
+                        path: path_str.to_string(),
                         error_line: 0,
                         error_msg: e.to_string(),
                     });
@@ -353,7 +336,7 @@ pub async fn get_vars_for_replacements(
                 Err(e) => {
                     tracing::warn!("Failed to parse {}: {}", path.display(), e);
                     error_log.push(YamlError {
-                        integr_config_path: path.to_string_lossy().to_string(),
+                        path: path.to_string_lossy().to_string(),
                         error_line: e.location().map(|loc| loc.line()).unwrap_or(0),
                         error_msg: format!("Failed to parse {}: {}", path.display(), e),
                     });
@@ -363,7 +346,7 @@ pub async fn get_vars_for_replacements(
             Err(e) => {
                 tracing::info!("Failed to read {}: {}", path.display(), e);
                 error_log.push(YamlError {
-                    integr_config_path: path.to_string_lossy().to_string(),
+                    path: path.to_string_lossy().to_string(),
                     error_line: 0,
                     error_msg: format!("Failed to read {}: {}", path.display(), e),
                 });
@@ -516,7 +499,7 @@ pub async fn integration_config_get(
                             Ok(_) => {}
                             Err(err) => {
                                 result.error_log.push(YamlError {
-                                    integr_config_path: better_integr_config_path.clone(),
+                                    path: better_integr_config_path.clone(),
                                     error_line: err.line(),
                                     error_msg: err.to_string(),
                                 });
@@ -532,7 +515,7 @@ pub async fn integration_config_get(
                     }
                     Err(err) => {
                         result.error_log.push(YamlError {
-                            integr_config_path: better_integr_config_path.clone(),
+                            path: better_integr_config_path.clone(),
                             error_line: err.location().map(|loc| loc.line()).unwrap_or(0),
                             error_msg: err.to_string(),
                         });
diff --git a/refact-agent/engine/src/knowledge.rs b/refact-agent/engine/src/knowledge.rs
index 64105e573..74e8e4f7f 100644
--- a/refact-agent/engine/src/knowledge.rs
+++ b/refact-agent/engine/src/knowledge.rs
@@ -242,7 +242,7 @@ impl MemoriesDatabase {
                 | rusqlite::OpenFlags::SQLITE_OPEN_URI,
         ).await.map_err(|err| format!("Failed to open database: {}", err))?;
         setup_db(&conn, pubsub_notifier.clone()).await?;
-        migrate_202501(&conn, constants.embedding_size, emb_table_name.clone(), reset_memory).await?;
+        migrate_202501(&conn, constants.embedding_model.embedding_size, emb_table_name.clone(), reset_memory).await?;
         crate::vecdb::vdb_emb_aux::cleanup_old_emb_tables(&conn, 7, 10).await?;
 
         let db = MemoriesDatabase {
@@ -504,7 +504,6 @@ pub async fn vectorize_dirty_memories(
     vecdb_handler: Arc<AMutex<VecDBSqlite>>,
     _status: Arc<AMutex<VecDbStatus>>,
     client: Arc<AMutex<Client>>,
-    api_key: &String,
     #[allow(non_snake_case)]
     B: usize,
 ) -> rusqlite::Result<(), String> {
@@ -525,13 +524,10 @@ pub async fn vectorize_dirty_memories(
     let my_constants: VecdbConstants = memdb.lock().await.vecdb_constants.clone();
     for chunk in to_vectorize.chunks_mut(B) {
         let texts: Vec<String> = chunk.iter().map(|x| x.window_text.clone()).collect();
-        let embedding_mb = crate::fetch_embedding::get_embedding_with_retry(
+        let embedding_mb = crate::fetch_embedding::get_embedding_with_retries(
             client.clone(),
-            &my_constants.endpoint_embeddings_style,
             &my_constants.embedding_model,
-            &my_constants.endpoint_embeddings_template,
             texts,
-            api_key,
             1,
         ).await?;
         for (chunk_save, x) in chunk.iter_mut().zip(embedding_mb.iter()) {
diff --git a/refact-agent/engine/src/known_models.json b/refact-agent/engine/src/known_models.json
new file mode 100644
index 000000000..0967c6c38
--- /dev/null
+++ b/refact-agent/engine/src/known_models.json
@@ -0,0 +1,964 @@
+{
+    "completion_models": {
+        "starcoder": {
+            "n_ctx": 4096,
+            "scratchpad_patch": {
+                "context_format": "starcoder",
+                "rag_ratio": 0.5
+            },
+            "scratchpad": "FIM-PSM",
+            "tokenizer": "hf://bigcode/starcoder2-3b",
+            "similar_models": [
+                "bigcode/starcoder",
+                "bigcode/starcoderbase",
+                "starcoder/15b/base",
+                "starcoder/15b/plus",
+                "starcoder/1b/base",
+                "starcoder/3b/base",
+                "starcoder/7b/base",
+                "wizardcoder/15b",
+                "starcoder/1b/vllm",
+                "starcoder/3b/vllm",
+                "starcoder/7b/vllm",
+                "starcoder2/3b/base",
+                "starcoder2/7b/base",
+                "starcoder2/15b/base",
+                "starcoder2/3b/vllm",
+                "starcoder2/7b/vllm",
+                "starcoder2/15b/vllm",
+                "starcoder2/3b/neuron",
+                "starcoder2/7b/neuron",
+                "starcoder2/15b/neuron",
+                "starcoder2/3b",
+                "starcoder2/7b",
+                "starcoder2/15b",
+                "bigcode/starcoder2-3b",
+                "bigcode/starcoder2-7b",
+                "bigcode/starcoder2-15b"
+            ]
+        },
+        "smallcloudai/Refact-1_6B-fim": {
+            "n_ctx": 4096,
+            "tokenizer": "hf://smallcloudai/Refact-1_6B-fim",
+            "scratchpad": "FIM-SPM",
+            "similar_models": [
+                "Refact/1.6B",
+                "Refact/1.6B/vllm"
+            ]
+        },
+        "codellama/CodeLlama-13b-hf": {
+            "n_ctx": 4096,
+            "scratchpad_patch": {
+                "fim_prefix": "<PRE>",
+                "fim_suffix": "<SUF>",
+                "fim_middle": "<MID>",
+                "eot": "<EOT>",
+                "eos": "</s>"
+            },
+            "scratchpad": "FIM-PSM",
+            "tokenizer": "hf://codellama/CodeLlama-13b-hf",
+            "similar_models": [
+                "codellama/7b"
+            ]
+        },
+        "deepseek-coder": {
+            "n_ctx": 4096,
+            "scratchpad_patch": {
+                "fim_prefix": "<｜fim▁begin｜>",
+                "fim_suffix": "<｜fim▁hole｜>",
+                "fim_middle": "<｜fim▁end｜>",
+                "eot": "<|EOT|>"
+            },
+            "scratchpad": "FIM-PSM",
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-1.3b-base",
+            "similar_models": [
+                "deepseek-coder/1.3b/base",
+                "deepseek-coder/5.7b/mqa-base",
+                "deepseek-coder/1.3b/vllm",
+                "deepseek-coder/5.7b/vllm",
+                "deepseek-ai/deepseek-coder-1.3b-base"
+            ]
+        },
+        "stable/3b/code": {
+            "n_ctx": 4096,
+            "scratchpad": "FIM-PSM",
+            "tokenizer": "hf://stabilityai/stable-code-3b",
+            "similar_models": []
+        },
+        "llama3/8b/instruct": {
+            "n_ctx": 8192,
+            "scratchpad_patch": {
+                "token_bos": "<|begin_of_text|>",
+                "token_esc": "<|eot_id|>",
+                "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
+                "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
+                "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+                "eot": "<|eot_id|>",
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "scratchpad": "REPLACE",
+            "tokenizer": "hf://Xenova/llama3-tokenizer",
+            "similar_models": [
+                "llama3/8b/instruct/neuron",
+                "llama3.1/8b/instruct",
+                "llama3.2/3b/instruct",
+                "llama3.2/1b/instruct"
+            ]
+        },
+        "deepseek-coder/6.7b/instruct-finetune/vllm": {
+            "n_ctx": 4096,
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-6.7b-instruct",
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            }
+        },
+        "llama3/8b/instruct/vllm": {
+            "n_ctx": 8192,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/llama3-tokenizer",
+            "similar_models": [
+                "llama3.1/8b/instruct/vllm"
+            ]
+        },
+        "llama3.2/1b/instruct/vllm": {
+            "n_ctx": 16384,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://meta-llama/llama-3.2-1b-instruct",
+            "similar_models": [
+                "llama3.2/3b/instruct/vllm"
+            ]
+        },
+        "qwen2.5/coder/1.5b/instruct/vllm": {
+            "n_ctx": 32768,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/3b/instruct/vllm",
+                "qwen2.5/coder/7b/instruct/vllm",
+                "qwen2.5/coder/14b/instruct/vllm",
+                "qwen2.5/coder/32b/instruct/vllm",
+                "qwen2.5/7b/instruct/vllm",
+                "qwen2.5/14b/instruct/vllm",
+                "qwen2.5/32b/instruct/vllm"
+            ]
+        },
+        "gpt-4o": {
+            "n_ctx": 128000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4o-2024-05-13",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "chatgpt-4o",
+                "openai/gpt-4o",
+                "openai/gpt-4o-2024-05-13",
+                "openai/gpt-4o-2024-08-06",
+                "openai/gpt-4o-mini",
+                "openai/gpt-4o-mini-2024-07-18",
+                "openai/chatgpt-4o"
+            ]
+        },
+        "claude-3-sonnet": {
+            "n_ctx": 200000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/claude-tokenizer",
+            "similar_models": [
+                "claude-3-haiku",
+                "claude-3-5-haiku",
+                "claude-3-5-haiku-20241022",
+                "claude-3-opus",
+                "claude-3-5-sonnet",
+                "claude-3-5-sonnet-20241022",
+                "claude-3-7-sonnet",
+                "claude-3-7-sonnet-20250219",
+                "anthropic/claude-3-sonnet",
+                "anthropic/claude-3-haiku",
+                "anthropic/claude-3-5-haiku",
+                "anthropic/claude-3-5-haiku-20241022",
+                "anthropic/claude-3-opus",
+                "anthropic/claude-3-5-sonnet",
+                "anthropic/claude-3-5-sonnet-20241022",
+                "anthropic/claude-3-7-sonnet",
+                "anthropic/claude-3-7-sonnet-20250219"
+            ]
+        },
+        "groq-llama-3.1-8b": {
+            "n_ctx": 128000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/Meta-Llama-3.1-Tokenizer",
+            "similar_models": [
+                "groq-llama-3.1-70b",
+                "groq-llama-3.2-1b",
+                "groq-llama-3.2-3b",
+                "groq-llama-3.2-11b-vision",
+                "groq-llama-3.2-90b-vision"
+            ]
+        },
+        "cerebras-llama3.1-8b": {
+            "n_ctx": 8192,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/Meta-Llama-3.1-Tokenizer",
+            "similar_models": [
+                "cerebras-llama3.1-70b"
+            ]
+        },
+        "gemini-2.0-flash-exp": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "gemini-1.5-flash",
+                "gemini-1.5-flash-8b"
+            ]
+        },
+        "gemini-1.5-pro": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "gemini-2.0-exp-advanced",
+                "gemini-2.5-pro"
+            ]
+        },
+        "grok-beta": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/grok-1-tokenizer",
+            "similar_models": [
+                "grok-2-1212",
+                "grok-2"
+            ]
+        },
+        "grok-vision-beta": {
+            "n_ctx": 8192,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2-vision-1212": {
+            "n_ctx": 32000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Xenova/grok-1-tokenizer",
+            "similar_models": [
+                "grok-2-vision"
+            ]
+        },
+        "deepseek-chat": {
+            "n_ctx": 64000,
+            "scratchpad": "REPLACE_PASSTHROUGH",
+            "scratchpad_patch": {
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://deepseek-ai/DeepSeek-V3"
+        },
+        "qwen2.5/coder/0.5b/instruct": {
+            "n_ctx": 8192,
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "<|im_start|>system\n",
+                "keyword_user": "<|im_start|>user\n",
+                "keyword_assistant": "<|im_start|>assistant\n",
+                "eot": "<|im_end|>",
+                "context_format": "chat",
+                "rag_ratio": 0.5
+            },
+            "scratchpad": "REPLACE",
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-0.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/1.5b/instruct",
+                "qwen2.5/coder/3b/instruct",
+                "qwen2.5/coder/7b/instruct/gptq8bit",
+                "qwen2.5/coder/7b/instruct",
+                "qwen2.5/coder/14b/instruct/gptq8bit",
+                "qwen2.5/coder/14b/instruct",
+                "qwen2.5/coder/32b/instruct/gptq8bit",
+                "qwen2.5/coder/32b/instruct"
+            ]
+        },
+        "qwen2.5-coder-base": {
+            "n_ctx": 8192,
+            "scratchpad_patch": {
+                "fim_prefix": "<|fim_prefix|>",
+                "fim_suffix": "<|fim_suffix|>",
+                "fim_middle": "<|fim_middle|>",
+                "eot": "<|endoftext|>",
+                "extra_stop_tokens": [
+                    "<|repo_name|>",
+                    "<|file_sep|>",
+                    "<|fim_pad|>",
+                    "<|cursor|>"
+                ],
+                "context_format": "qwen2.5",
+                "rag_ratio": 0.5
+            },
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-0.5B",
+            "scratchpad": "FIM-PSM",
+            "similar_models": [
+                "qwen2.5/coder/0.5b/base",
+                "qwen2.5/coder/1.5b/base",
+                "qwen2.5/coder/3b/base",
+                "qwen2.5/coder/7b/base",
+                "qwen2.5/coder/14b/base",
+                "qwen2.5/coder/32b/base",
+                "qwen2.5/coder/0.5b/base/vllm",
+                "qwen2.5/coder/1.5b/base/vllm",
+                "qwen2.5/coder/3b/base/vllm",
+                "qwen2.5/coder/7b/base/vllm",
+                "qwen2.5/coder/14b/base/vllm",
+                "qwen2.5/coder/32b/base/vllm"
+            ]
+        }
+    },
+    "chat_models": {
+        "gpt-4o": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4o-2024-05-13",
+                "gpt-4o-2024-08-06",
+                "openai/gpt-4o",
+                "openai/gpt-4o-2024-05-13",
+                "openai/gpt-4o-2024-08-06"
+            ]
+        },
+        "gpt-4o-mini": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "gpt-4o-mini-2024-07-18",
+                "openai/gpt-4o-mini",
+                "openai/gpt-4o-min-2024-07-18"
+            ],
+            "tokenizer": "hf://Xenova/gpt-4o"
+        },
+        "o1": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_reasoning": "openai",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/o1"
+            ]
+        },
+        "o1-mini": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_reasoning": "openai",
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/o1-mini"
+            ]
+        },
+        "o3-mini": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "supports_agent": true,
+            "supports_reasoning": "openai",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/o3-mini"
+            ]
+        },
+        "gpt-4.5-preview": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "openai/gpt-4.5-preview"
+            ]
+        },
+        "gpt-4.1": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4.1-2025-04-14",
+                "openai/gpt-4.1",
+                "openai/gpt-4.1-2025-04-14"
+            ]
+        },
+        "gpt-4.1-mini": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "gpt-4.1-nano",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "openai/gpt-4.1-mini",
+                "openai/gpt-4.1-nano",
+                "openai/gpt-4.1-mini-2025-04-14",
+                "openai/gpt-4.1-nano-2025-04-14"
+            ]
+        },
+        "chatgpt-4o": {
+            "n_ctx": 128000,
+            "supports_tools": false,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gpt-4o",
+            "similar_models": [
+                "chatgpt-4o-latest",
+                "openai/chatgpt-4o",
+                "openai/chatgpt-4o-latest"
+            ]
+        },
+        "claude-instant-1.2": {
+            "n_ctx": 8096,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "claude-2.1",
+                "claude-3-haiku",
+                "claude-3-opus",
+                "claude-3-sonnet",
+                "anthropic/claude-instant-1.2",
+                "anthropic/claude-2.1",
+                "anthropic/claude-3-haiku",
+                "anthropic/claude-3-opus",
+                "anthropic/claude-3-sonnet"
+            ],
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "claude-3-5-sonnet": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/claude-tokenizer",
+            "similar_models": [
+                "claude-3-5-sonnet-20240620",
+                "claude-3-5-sonnet-latest",
+                "anthropic/claude-3-5-sonnet-20240620",
+                "anthropic/claude-3.5-sonnet",
+                "anthropic/claude-3-5-sonnet-latest"
+            ]
+        },
+        "claude-3-5-sonnet-20241022": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_clicks": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "claude-3-5-haiku": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "claude-3-5-haiku-20241022",
+                "claude-3-5-haiku-latest",
+                "anthropic/claude-3-5-haiku",
+                "anthropic/claude-3-5-haiku-latest"
+            ],
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "claude-3-7-sonnet": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_clicks": true,
+            "supports_agent": true,
+            "supports_reasoning": "anthropic",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "claude-3-7-sonnet-20250219",
+                "claude-3-7-sonnet-latest",
+                "anthropic/claude-3.7-sonnet",
+                "anthropic/claude-3-7-sonnet",
+                "anthropic/claude-3.7-sonnet-latest",
+                "anthropic/claude-3-7-sonnet-latest"
+            ],
+            "tokenizer": "hf://Xenova/claude-tokenizer"
+        },
+        "gemini-2.5-pro": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "gemini-2.5-pro-exp-03-25",
+                "gemini-2.5-pro-preview-03-25"
+            ]
+        },
+        "gemini-2.0-flash": {
+            "n_ctx": 200000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "models/gemini-2.0-flash"
+            ]
+        },
+        "gemini-2.0-flash-exp": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "gemini-1.5-flash",
+                "gemini-1.5-flash-8b"
+            ],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        },
+        "gemini-2.0-flash-lite": {
+            "n_ctx": 200000,
+            "supports_tools": false,
+            "supports_multimodality": true,
+            "supports_agent": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/gemma2-tokenizer",
+            "similar_models": [
+                "models/gemini-2.0-flash-lite"
+            ]
+        },
+        "gemini-1.5-pro": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "gemini-2.0-exp-advanced"
+            ],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        },
+        "llama3/8b/instruct": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
+                "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
+                "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+                "eot": "<|eot_id|>",
+                "stop_list": [
+                    "<|eot_id|>"
+                ]
+            },
+            "tokenizer": "hf://Xenova/llama3-tokenizer",
+            "similar_models": [
+                "llama3/8b/instruct/neuron",
+                "meta-llama/llama-3.1-8b-instruct",
+                "llama3.1/8b/instruct",
+                "llama3.2/3b/instruct",
+                "llama3.2/1b/instruct"
+            ]
+        },
+        "deepseek-coder/6.7b/instruct-finetune/vllm": {
+            "n_ctx": 4096,
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-6.7b-instruct",
+            "scratchpad": "PASSTHROUGH"
+        },
+        "llama3/8b/instruct/vllm": {
+            "n_ctx": 8192,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://meta-llama/Meta-Llama-3-8B-Instruct",
+            "similar_models": [
+                "llama3.1/8b/instruct/vllm"
+            ]
+        },
+        "llama3.2/1b/instruct/vllm": {
+            "n_ctx": 16384,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://meta-llama/Llama-3.2-1B-Instruct",
+            "similar_models": [
+                "llama3.2/3b/instruct/vllm",
+                "llama3.3/70b/instruct/vllm"
+            ]
+        },
+        "mistral/24b/instruct/vllm": {
+            "n_ctx": 16384,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "mistralai/Mistral-Small-24B-Instruct-2501",
+            "similar_models": []
+        },
+        "qwen2.5/coder/1.5b/instruct/vllm": {
+            "n_ctx": 32768,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-1.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/3b/instruct/vllm",
+                "qwen2.5/coder/7b/instruct/vllm",
+                "qwen2.5/coder/14b/instruct/vllm",
+                "qwen2.5/coder/32b/instruct/vllm"
+            ]
+        },
+        "qwen2.5/7b/instruct/vllm": {
+            "n_ctx": 32768,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/Qwen2.5-7B-Instruct",
+            "similar_models": [
+                "qwen2.5/14b/instruct/vllm",
+                "qwen2.5/32b/instruct/vllm"
+            ]
+        },
+        "qwen-qwq/32b/vllm": {
+            "n_ctx": 32768,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/QwQ-32B",
+            "similar_models": [
+                "qwen-qwq/32b/awq/vllm"
+            ]
+        },
+        "qwen-qwq-32b": {
+            "n_ctx": 131072,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Qwen/QwQ-32B",
+            "similar_models": [
+                "groq/qwen-qwq-32b"
+            ]
+        },
+        "wizardlm/7b": {
+            "n_ctx": 2048,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "<s>",
+                "keyword_user": "\nUSER: ",
+                "keyword_assistant": "\nASSISTANT: ",
+                "eot": "",
+                "stop_list": [
+                    "\n\n"
+                ]
+            },
+            "tokenizer": "hf://cognitivecomputations/WizardLM-7B-Uncensored",
+            "similar_models": [
+                "wizardlm/13b",
+                "wizardlm/30b"
+            ]
+        },
+        "magicoder/6.7b": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "",
+                "keyword_user": "\n@@ Instruction\n",
+                "keyword_assistant": "\n@@ Response\n",
+                "stop_list": [],
+                "eot": "<|EOT|>"
+            },
+            "tokenizer": "hf://ise-uiuc/Magicoder-S-DS-6.7B"
+        },
+        "mistral/7b/instruct-v0.1": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "",
+                "keyword_user": "[INST] ",
+                "keyword_assistant": "[/INST]\n",
+                "stop_list": [],
+                "eot": "</s>"
+            },
+            "tokenizer": "hf://mistralai/Mistral-7B-Instruct-v0.1",
+            "similar_models": [
+                "mixtral/8x7b/instruct-v0.1"
+            ]
+        },
+        "phind/34b/v2": {
+            "n_ctx": 4095,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "### System Prompt\n",
+                "keyword_user": "\n### User Message\n",
+                "keyword_assistant": "\n### Assistant\n",
+                "stop_list": [],
+                "eot": "</s>"
+            },
+            "tokenizer": "hf://Phind/Phind-CodeLlama-34B-v2"
+        },
+        "deepseek-coder/6.7b/instruct": {
+            "n_ctx": 4096,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "",
+                "keyword_user": "### Instruction:\n",
+                "keyword_assistant": "### Response:\n",
+                "stop_list": [],
+                "eot": "<|EOT|>"
+            },
+            "tokenizer": "hf://deepseek-ai/deepseek-coder-6.7b-instruct",
+            "similar_models": [
+                "deepseek-coder/33b/instruct",
+                "deepseek-coder/6.7b/instruct-finetune"
+            ]
+        },
+        "groq-llama-3.1-8b": {
+            "n_ctx": 128000,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "similar_models": [
+                "groq-llama-3.1-70b",
+                "groq-llama-3.2-1b",
+                "groq-llama-3.2-3b",
+                "groq-llama-3.2-11b-vision",
+                "groq-llama-3.2-90b-vision"
+            ]
+        },
+        "cerebras-llama3.1-8b": {
+            "n_ctx": 8192,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/Meta-Llama-3.1-Tokenizer",
+            "similar_models": [
+                "cerebras-llama3.1-70b"
+            ]
+        },
+        "grok-3-beta": {
+            "n_ctx": 131072,
+            "supports_tools": true,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-3-mini-beta": {
+            "n_ctx": 131072,
+            "supports_tools": true,
+            "supports_agent": true,
+            "supports_reasoning": "openai",
+            "supports_boost_reasoning": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-beta": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-vision-beta": {
+            "n_ctx": 8192,
+            "supports_tools": false,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2-vision-1212": {
+            "n_ctx": 32000,
+            "supports_tools": true,
+            "supports_multimodality": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2-1212": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "grok-2": {
+            "n_ctx": 128000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://Xenova/grok-1-tokenizer"
+        },
+        "deepseek-chat": {
+            "n_ctx": 64000,
+            "supports_tools": true,
+            "supports_multimodality": false,
+            "supports_agent": true,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://deepseek-ai/DeepSeek-V3"
+        },
+        "deepseek-reasoner": {
+            "n_ctx": 64000,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "supports_reasoning": "deepseek",
+            "default_temperature": 0.6,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://deepseek-ai/DeepSeek-R1"
+        },
+        "qwen2.5/coder/0.5b/instruct": {
+            "n_ctx": 8192,
+            "supports_tools": false,
+            "supports_multimodality": false,
+            "scratchpad": "CHAT-GENERIC",
+            "scratchpad_patch": {
+                "token_bos": "",
+                "token_esc": "",
+                "keyword_system": "<|im_start|>system\n",
+                "keyword_user": "<|im_start|>user\n",
+                "keyword_assistant": "<|im_start|>assistant\n",
+                "eot": "<|im_end|>",
+                "stop_list": [
+                    "<|im_end|>"
+                ]
+            },
+            "tokenizer": "hf://Qwen/Qwen2.5-Coder-0.5B-Instruct",
+            "similar_models": [
+                "qwen2.5/coder/1.5b/instruct",
+                "qwen2.5/coder/3b/instruct",
+                "qwen2.5/coder/7b/instruct/gptq8bit",
+                "qwen2.5/coder/7b/instruct",
+                "qwen2.5/coder/14b/instruct/gptq8bit",
+                "qwen2.5/coder/14b/instruct",
+                "qwen2.5/coder/32b/instruct/gptq8bit",
+                "qwen2.5/coder/32b/instruct"
+            ]
+        },
+        "deepseek-r1-distill/1.5b/vllm": {
+            "n_ctx": 32768,
+            "supports_reasoning": "deepseek",
+            "default_temperature": 0.6,
+            "scratchpad": "PASSTHROUGH",
+            "tokenizer": "hf://deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+            "similar_models": [
+                "deepseek-r1-distill/7b/vllm",
+                "deepseek-r1-distill/8b/vllm",
+                "deepseek-r1-distill/14b/vllm",
+                "deepseek-r1-distill/32b/vllm",
+                "deepseek-r1-distill/70b/vllm"
+            ]
+        }
+    },
+    "embedding_models": {
+        "thenlper/gte-base": {
+            "n_ctx": 512,
+            "embedding_size": 768,
+            "rejection_threshold": 0.25,
+            "tokenizer": "hf://thenlper/gte-base"
+        },
+        "text-embedding-3-small": {
+            "n_ctx": 8191,
+            "embedding_size": 1536,
+            "rejection_threshold": 0.63,
+            "similar_models": [
+                "openai/text-embedding-3-small"
+            ],
+            "tokenizer": "hf://Xenova/text-embedding-ada-002"
+        },
+        "gemini-embedding-exp": {
+            "n_ctx": 8192,
+            "embedding_size": 3072,
+            "similar_models": [
+                "gemini-embedding-exp-03-07"
+            ],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        },
+        "models/text-embedding-004": {
+            "n_ctx": 2048,
+            "embedding_size": 768,
+            "similar_models": [],
+            "tokenizer": "hf://Xenova/gemma2-tokenizer"
+        }
+    },
+    "comments": [
+        "gemini and gemma bear the same tokenizer",
+        "according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677",
+        "downloadable tokenizer.json does not exist for gemini, the only precise way is to use web-requests",
+
+        "XAI WARNING: tokenizer is non-precise as there's no publicly available tokenizer for these models",
+        "XAI says that for exact same model different tokenizers could be used",
+        "therefore, using tokenizer for grok-1 which may or may not provide proximate enough results"
+    ]
+}
diff --git a/refact-agent/engine/src/known_models.rs b/refact-agent/engine/src/known_models.rs
deleted file mode 100644
index e81e8186a..000000000
--- a/refact-agent/engine/src/known_models.rs
+++ /dev/null
@@ -1,904 +0,0 @@
-pub const KNOWN_MODELS: &str = r####"
-{
-    "code_completion_models": {
-        "bigcode/starcoder": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "context_format": "starcoder",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "bigcode/starcoderbase",
-                "starcoder/15b/base",
-                "starcoder/15b/plus",
-                "starcoder/1b/base",
-                "starcoder/3b/base",
-                "starcoder/7b/base",
-                "wizardcoder/15b",
-                "starcoder/1b/vllm",
-                "starcoder/3b/vllm",
-                "starcoder/7b/vllm",
-                "starcoder2/3b/base",
-                "starcoder2/7b/base",
-                "starcoder2/15b/base",
-                "starcoder2/3b/vllm",
-                "starcoder2/7b/vllm",
-                "starcoder2/15b/vllm",
-                "starcoder2/3b/neuron",
-                "starcoder2/7b/neuron",
-                "starcoder2/15b/neuron",
-                "starcoder2/3b",
-                "starcoder2/7b",
-                "starcoder2/15b",
-                "bigcode/starcoder2-3b",
-                "bigcode/starcoder2-7b",
-                "bigcode/starcoder2-15b"
-            ]
-        },
-        "smallcloudai/Refact-1_6B-fim": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-SPM": { }
-            },
-            "default_scratchpad": "FIM-SPM",
-            "similar_models": [
-                "Refact/1.6B",
-                "Refact/1.6B/vllm"
-            ]
-        },
-        "codellama/CodeLlama-13b-hf": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "fim_prefix": "<PRE>",
-                    "fim_suffix": "<SUF>",
-                    "fim_middle": "<MID>",
-                    "eot": "<EOT>",
-                    "eos": "</s>"
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "codellama/7b"
-            ]
-        },
-        "deepseek-coder/1.3b/base": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "fim_prefix": "<｜fim▁begin｜>",
-                    "fim_suffix": "<｜fim▁hole｜>",
-                    "fim_middle": "<｜fim▁end｜>",
-                    "eot": "<|EOT|>"
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "deepseek-coder/5.7b/mqa-base",
-                "deepseek-coder/1.3b/vllm",
-                "deepseek-coder/5.7b/vllm"
-            ]
-        },
-        "stable/3b/code": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "FIM-PSM": {}
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": []
-        },
-        "llama3/8b/instruct": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE": {
-                    "token_bos": "<|begin_of_text|>",
-                    "token_esc": "<|eot_id|>",
-                    "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
-                    "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
-                    "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
-                    "eot": "<|eot_id|>",
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "REPLACE",
-            "similar_models": [
-                "llama3/8b/instruct/neuron",
-                "llama3.1/8b/instruct",
-                "llama3.2/3b/instruct",
-                "llama3.2/1b/instruct"
-            ]
-        },
-        "deepseek-coder/6.7b/instruct-finetune/vllm": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            }
-        },
-        "llama3/8b/instruct/vllm": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "llama3.1/8b/instruct/vllm"
-            ]
-        },
-        "llama3.2/1b/instruct/vllm": {
-            "n_ctx": 16384,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "llama3.2/3b/instruct/vllm"
-            ]
-        },
-        "qwen2.5/coder/1.5b/instruct/vllm": {
-            "n_ctx": 32768,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "qwen2.5/coder/3b/instruct/vllm",
-                "qwen2.5/coder/7b/instruct/vllm",
-                "qwen2.5/coder/14b/instruct/vllm",
-                "qwen2.5/coder/32b/instruct/vllm",
-                "qwen2.5/7b/instruct/vllm",
-                "qwen2.5/14b/instruct/vllm",
-                "qwen2.5/32b/instruct/vllm"
-            ]
-        },
-        "gpt-4o": {
-            "n_ctx": 128000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "gpt-4o-2024-05-13",
-                "gpt-4o-2024-08-06",
-                "openai/gpt-4o",
-                "gpt-4o-mini",
-                "gpt-4o-mini-2024-07-18",
-                "chatgpt-4o"
-            ]
-        },
-        "claude-3-sonnet": {
-            "n_ctx": 200000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "claude-3-haiku",
-                "claude-3-5-haiku",
-                "claude-3-5-haiku-20241022",
-                "claude-3-opus",
-                "claude-3-5-sonnet",
-                "claude-3-5-sonnet-20241022",
-                "claude-3-7-sonnet",
-                "claude-3-7-sonnet-20250219"
-            ]
-        },
-        "groq-llama-3.1-8b": {
-            "n_ctx": 128000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "groq-llama-3.1-70b",
-                "groq-llama-3.2-1b",
-                "groq-llama-3.2-3b",
-                "groq-llama-3.2-11b-vision",
-                "groq-llama-3.2-90b-vision"
-            ]
-        },
-        "cerebras-llama3.1-8b": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "cerebras-llama3.1-70b"
-            ]
-        },
-        "gemini-2.0-flash-exp": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-1.5-flash",
-                "gemini-1.5-flash-8b"
-            ]
-        },
-        "gemini-1.5-pro": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-2.0-exp-advanced",
-                "gemini-2.5-pro"
-            ]
-        },
-        "grok-beta": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "grok-2-1212",
-                "grok-2"
-            ]
-        },
-        "grok-vision-beta": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            }
-        },
-        "grok-2-vision-1212": {
-            "n_ctx": 32000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "similar_models": [
-                "grok-2-vision"
-            ]
-        },
-        "deepseek-chat": {
-            "n_ctx": 64000,
-            "supports_scratchpads": {
-                "REPLACE_PASSTHROUGH": {
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            }
-        },
-        "qwen2.5/coder/0.5b/instruct": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "REPLACE": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "<|im_start|>system\n",
-                    "keyword_user": "<|im_start|>user\n",
-                    "keyword_assistant": "<|im_start|>assistant\n",
-                    "eot": "<|im_end|>",
-                    "context_format": "chat",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "REPLACE",
-            "similar_models": [
-                "qwen2.5/coder/1.5b/instruct",
-                "qwen2.5/coder/3b/instruct",
-                "qwen2.5/coder/7b/instruct/gptq8bit",
-                "qwen2.5/coder/7b/instruct",
-                "qwen2.5/coder/14b/instruct/gptq8bit",
-                "qwen2.5/coder/14b/instruct",
-                "qwen2.5/coder/32b/instruct/gptq8bit",
-                "qwen2.5/coder/32b/instruct"
-            ]
-        },
-        "qwen2.5/coder/0.5b/base": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "FIM-PSM": {
-                    "fim_prefix": "<|fim_prefix|>",
-                    "fim_suffix": "<|fim_suffix|>",
-                    "fim_middle": "<|fim_middle|>",
-                    "eot": "<|endoftext|>",
-                    "extra_stop_tokens": ["<|repo_name|>", "<|file_sep|>", "<|fim_pad|>", "<|cursor|>"],
-                    "context_format": "qwen2.5",
-                    "rag_ratio": 0.5
-                }
-            },
-            "default_scratchpad": "FIM-PSM",
-            "similar_models": [
-                "qwen2.5/coder/1.5b/base",
-                "qwen2.5/coder/3b/base",
-                "qwen2.5/coder/7b/base",
-                "qwen2.5/coder/14b/base",
-                "qwen2.5/coder/32b/base",
-                "qwen2.5/coder/0.5b/base/vllm",
-                "qwen2.5/coder/1.5b/base/vllm",
-                "qwen2.5/coder/3b/base/vllm",
-                "qwen2.5/coder/7b/base/vllm",
-                "qwen2.5/coder/14b/base/vllm",
-                "qwen2.5/coder/32b/base/vllm"
-            ]
-        }
-    },
-    "code_chat_models": {
-        "gpt-4o": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4o-2024-05-13",
-                "gpt-4o-2024-08-06",
-                "openai/gpt-4o",
-                "chatgpt-4o"
-            ]
-        },
-        "gpt-4o-mini": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4o-mini-2024-07-18"
-            ]
-        },
-        "gpt-4.1": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4.1-2025-04-14"
-            ]
-        },
-        "gpt-4.1-mini": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": [
-                "gpt-4.1-nano",
-                "gpt-4.1-mini-2025-04-14",
-                "gpt-4.1-nano-2025-04-14"
-            ]
-        },
-        "chatgpt-4o": {
-            "n_ctx": 128000,
-            "supports_tools": false,
-            "supports_multimodality": true,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            },
-            "similar_models": []
-        },
-        "o1": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_reasoning": "openai",
-            "supports_boost_reasoning": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            }
-        },
-        "o1-mini": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_reasoning": "openai",
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            }
-        },
-        "o3-mini": {
-            "n_ctx": 200000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_agent": true,
-            "supports_reasoning": "openai",
-            "supports_boost_reasoning": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {
-                }
-            }
-        },
-        "claude-instant-1.2": {
-            "n_ctx": 8096,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-2.1",
-                "claude-3-haiku",
-                "claude-3-opus",
-                "claude-3-sonnet"
-            ]
-        },
-        "claude-3-5-sonnet": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-3-5-sonnet-20240620"
-            ]
-        },
-        "claude-3-5-sonnet-20241022": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_clicks": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "claude-3-5-haiku": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-3-5-haiku-20241022"
-            ]
-        },
-        "claude-3-7-sonnet": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_clicks": true,
-            "supports_agent": true,
-            "supports_reasoning": "anthropic",
-            "supports_boost_reasoning": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "claude-3-7-sonnet-20250219"
-            ]
-        },
-        "gemini-2.0-flash-exp": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-1.5-flash",
-                "gemini-1.5-flash-8b"
-            ]
-        },
-        "gemini-1.5-pro": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "gemini-2.0-exp-advanced",
-                "gemini-2.5-pro"
-            ]
-        },
-        "llama3/8b/instruct": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "<|start_header_id|>system<|end_header_id|>\n\n",
-                    "keyword_user": "<|start_header_id|>user<|end_header_id|>\n\n",
-                    "keyword_assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n",
-                    "eot": "<|eot_id|>",
-                    "stop_list": [
-                        "<|eot_id|>"
-                    ]
-                }
-            },
-            "similar_models": [
-                "llama3/8b/instruct/neuron",
-                "meta-llama/llama-3.1-8b-instruct",
-                "llama3.1/8b/instruct",
-                "llama3.2/3b/instruct",
-                "llama3.2/1b/instruct"
-            ]
-        },
-        "deepseek-coder/6.7b/instruct-finetune/vllm": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "llama3/8b/instruct/vllm": {
-            "n_ctx": 8192,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "llama3.1/8b/instruct/vllm"
-            ]
-        },
-        "llama3.2/1b/instruct/vllm": {
-            "n_ctx": 16384,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "llama3.2/3b/instruct/vllm",
-                "llama3.3/70b/instruct/vllm"
-            ]
-        },
-        "mistral/24b/instruct/vllm": {
-            "n_ctx": 16384,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-            ]
-        },
-        "qwen2.5/coder/1.5b/instruct/vllm": {
-            "n_ctx": 32768,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "qwen2.5/coder/3b/instruct/vllm",
-                "qwen2.5/coder/7b/instruct/vllm",
-                "qwen2.5/coder/14b/instruct/vllm",
-                "qwen2.5/coder/32b/instruct/vllm"
-            ]
-        },
-        "qwen2.5/7b/instruct/vllm": {
-            "n_ctx": 32768,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "qwen2.5/14b/instruct/vllm",
-                "qwen2.5/32b/instruct/vllm"
-            ]
-        },
-        "qwen-qwq/32b/vllm": {
-            "n_ctx": 32768,
-            "supports_tools": true,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "qwen-qwq/32b/awq/vllm"
-            ]
-        },
-        "wizardlm/7b": {
-            "n_ctx": 2048,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "<s>",
-                    "keyword_user": "\nUSER: ",
-                    "keyword_assistant": "\nASSISTANT: ",
-                    "eot": "",
-                    "stop_list": ["\n\n"]
-                }
-            },
-            "similar_models": [
-                "wizardlm/13b",
-                "wizardlm/30b"
-            ]
-        },
-        "magicoder/6.7b": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "",
-                    "keyword_user": "\n@@ Instruction\n",
-                    "keyword_assistant": "\n@@ Response\n",
-                    "stop_list": [],
-                    "eot": "<|EOT|>"
-                }
-            }
-        },
-        "mistral/7b/instruct-v0.1": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "",
-                    "keyword_user": "[INST] ",
-                    "keyword_assistant": "[/INST]\n",
-                    "stop_list": [],
-                    "eot": "</s>"
-                }
-            },
-            "similar_models": [
-                "mixtral/8x7b/instruct-v0.1"
-            ]
-        },
-        "phind/34b/v2": {
-            "n_ctx": 4095,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "### System Prompt\n",
-                    "keyword_user": "\n### User Message\n",
-                    "keyword_assistant": "\n### Assistant\n",
-                    "stop_list": [],
-                    "eot": "</s>"
-                }
-            }
-        },
-        "deepseek-coder/6.7b/instruct": {
-            "n_ctx": 4096,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "",
-                    "keyword_user": "### Instruction:\n",
-                    "keyword_assistant": "### Response:\n",
-                    "stop_list": [],
-                    "eot": "<|EOT|>"
-                }
-            },
-            "similar_models": [
-                "deepseek-coder/33b/instruct",
-                "deepseek-coder/6.7b/instruct-finetune"
-            ]
-        },
-        "groq-llama-3.1-8b": {
-            "n_ctx": 128000,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "groq-llama-3.1-70b",
-                "groq-llama-3.2-1b",
-                "groq-llama-3.2-3b",
-                "groq-llama-3.2-11b-vision",
-                "groq-llama-3.2-90b-vision"
-            ]
-        },
-        "cerebras-llama3.1-8b": {
-            "n_ctx": 8192,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "cerebras-llama3.1-70b"
-            ]
-        },
-        "grok-beta": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-vision-beta": {
-            "n_ctx": 8192,
-            "supports_tools": false,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-2-vision-1212": {
-            "n_ctx": 32000,
-            "supports_tools": true,
-            "supports_multimodality": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-2-1212": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "grok-2": {
-            "n_ctx": 128000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "deepseek-chat": {
-            "n_ctx": 64000,
-            "supports_tools": true,
-            "supports_multimodality": false,
-            "supports_agent": true,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "deepseek-reasoner": {
-            "n_ctx": 64000,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_reasoning": "deepseek",
-            "default_temperature": 0.6,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            }
-        },
-        "qwen2.5/coder/0.5b/instruct": {
-            "n_ctx": 8192,
-            "supports_tools": false,
-            "supports_multimodality": false,
-            "supports_scratchpads": {
-                "CHAT-GENERIC": {
-                    "token_bos": "",
-                    "token_esc": "",
-                    "keyword_system": "<|im_start|>system\n",
-                    "keyword_user": "<|im_start|>user\n",
-                    "keyword_assistant": "<|im_start|>assistant\n",
-                    "eot": "<|im_end|>",
-                    "stop_list": [
-                        "<|im_end|>"
-                    ]
-                }
-            },
-            "similar_models": [
-                "qwen2.5/coder/1.5b/instruct",
-                "qwen2.5/coder/3b/instruct",
-                "qwen2.5/coder/7b/instruct/gptq8bit",
-                "qwen2.5/coder/7b/instruct",
-                "qwen2.5/coder/14b/instruct/gptq8bit",
-                "qwen2.5/coder/14b/instruct",
-                "qwen2.5/coder/32b/instruct/gptq8bit",
-                "qwen2.5/coder/32b/instruct"
-            ]
-        },
-        "deepseek-r1-distill/1.5b/vllm": {
-            "n_ctx": 32768,
-            "supports_reasoning": "deepseek",
-            "default_temperature": 0.6,
-            "supports_scratchpads": {
-                "PASSTHROUGH": {}
-            },
-            "similar_models": [
-                "deepseek-r1-distill/7b/vllm",
-                "deepseek-r1-distill/8b/vllm",
-                "deepseek-r1-distill/14b/vllm",
-                "deepseek-r1-distill/32b/vllm",
-                "deepseek-r1-distill/70b/vllm"
-            ]
-        }
-    },
-    "tokenizer_rewrite_path": {
-        "Refact/1.6B": "smallcloudai/Refact-1_6B-fim",
-        "starcoder2/3b": "bigcode/starcoder2-3b",
-
-        "text-embedding-3-small": "Xenova/text-embedding-ada-002",
-        "gpt-4o":                 "Xenova/gpt-4o",
-        "gpt-4o-2024-05-13":      "Xenova/gpt-4o",
-        "gpt-4o-2024-08-06":      "Xenova/gpt-4o",
-        "gpt-4o-mini":            "Xenova/gpt-4o",
-        "gpt-4o-mini-2024-07-18": "Xenova/gpt-4o",
-        "o1":                     "Xenova/gpt-4o",
-        "o1-mini":                "Xenova/gpt-4o",
-        "o3-mini":                "Xenova/gpt-4o",
-        "openai/gpt-4o":          "Xenova/gpt-4o",
-        "chatgpt-4o":             "Xenova/gpt-4o",
-
-        "gpt-4.1":                    "Xenova/gpt-4o",
-        "gpt-4.1-2025-04-14":         "Xenova/gpt-4o",
-        "gpt-4.1-mini":               "Xenova/gpt-4o",
-        "gpt-4.1-mini-2025-04-14":    "Xenova/gpt-4o",
-        "gpt-4.1-nano":               "Xenova/gpt-4o",
-        "gpt-4.1-nano-2025-04-14":    "Xenova/gpt-4o",
-
-        "claude-3-5-sonnet":          "Xenova/claude-tokenizer",
-        "claude-3-5-haiku":           "Xenova/claude-tokenizer",
-        "claude-3-5-haiku-20241022":  "Xenova/claude-tokenizer",
-        "claude-3-5-sonnet-20240620": "Xenova/claude-tokenizer",
-        "claude-3-5-sonnet-20241022": "Xenova/claude-tokenizer",
-        "claude-3-7-sonnet":          "Xenova/claude-tokenizer",
-
-        "groq-llama-3.1-8b":      "Xenova/Meta-Llama-3.1-Tokenizer",
-        "cerebras-llama3.1-8b":     "Xenova/Meta-Llama-3.1-Tokenizer",
-
-        "grok-beta": "Xenova/grok-1-tokenizer",
-        "grok-vision-beta": "Xenova/grok-1-tokenizer",
-        "grok-2": "Xenova/grok-1-tokenizer",
-        "grok-2-vision-1212": "Xenova/grok-1-tokenizer",
-        "grok-2-1212": "Xenova/grok-1-tokenizer",
-
-        "gemini-2.0-flash-exp": "Xenova/gemma2-tokenizer",
-        "gemini-1.5-flash": "Xenova/gemma2-tokenizer",
-        "gemini-1.5-flash-8b": "Xenova/gemma2-tokenizer",
-        "gemini-1.5-pro": "Xenova/gemma2-tokenizer",
-        "gemini-2.0-exp-advanced": "Xenova/gemma2-tokenizer",
-        "gemini-2.5-pro": "Xenova/gemma2-tokenizer",
-
-        "deepseek-chat":     "deepseek-ai/DeepSeek-V3",
-        "deepseek-reasoner": "deepseek-ai/DeepSeek-R1"
-    }
-}
-"####;
-
-// gemini and gemma bear the same tokenizer
-// according to https://medium.com/google-cloud/a-gemini-and-gemma-tokenizer-in-java-e18831ac9677
-// downloadable tokenizer.json does not exist for gemini, the only precise way is to use web-requests
-
-
-// XAI WARNING: tokenizer is non-precise as there's no publicly available tokenizer for these models
-// XAI says that for exact same model different tokenizers could be used
-// therefore, using tokenizer for grok-1 which may or may not provide proximate enough results
diff --git a/refact-agent/engine/src/lsp.rs b/refact-agent/engine/src/lsp.rs
index 9144fbfde..1035bbec1 100644
--- a/refact-agent/engine/src/lsp.rs
+++ b/refact-agent/engine/src/lsp.rs
@@ -143,7 +143,6 @@ impl LspBackend {
                 ..Default::default()
             },
             model: "".to_string(),
-            scratchpad: "".to_string(),
             stream: false,
             no_cache: false,
             use_ast: false,
diff --git a/refact-agent/engine/src/main.rs b/refact-agent/engine/src/main.rs
index 89335b2a1..27f628fb5 100644
--- a/refact-agent/engine/src/main.rs
+++ b/refact-agent/engine/src/main.rs
@@ -48,8 +48,7 @@ mod at_commands;
 mod tools;
 mod postprocessing;
 mod completion_cache;
-mod cached_tokenizers;
-mod known_models;
+mod tokens;
 mod scratchpad_abstract;
 mod scratchpads;
 
@@ -152,12 +151,7 @@ async fn main() {
         let mut error_log = Vec::new();
         let cust = load_customization(gcx.clone(), false, &mut error_log).await;
         for e in error_log.iter() {
-            eprintln!(
-                "{}:{} {:?}",
-                crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-                e.error_line,
-                e.error_msg,
-            );
+            eprintln!("{e}");
         }
         println!("{}", serde_json::to_string_pretty(&cust).unwrap());
         std::process::exit(0);
diff --git a/refact-agent/engine/src/postprocessing/pp_context_files.rs b/refact-agent/engine/src/postprocessing/pp_context_files.rs
index a8a15460c..744c55561 100644
--- a/refact-agent/engine/src/postprocessing/pp_context_files.rs
+++ b/refact-agent/engine/src/postprocessing/pp_context_files.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock;
 use std::collections::HashSet;
 use tracing::{info, warn};
 use tokenizers::Tokenizer;
@@ -12,7 +11,7 @@ use crate::ast::ast_structs::AstDefinition;
 use crate::global_context::GlobalContext;
 use crate::nicer_logs::{first_n_chars, last_n_chars};
 use crate::postprocessing::pp_utils::{color_with_gradient_type, colorize_comments_up, colorize_if_more_useful, colorize_minus_one, colorize_parentof, downgrade_lines_if_subsymbol, pp_ast_markup_files};
-use crate::scratchpads::scratchpad_utils::count_tokens;
+use crate::tokens::count_text_tokens_with_fallback;
 
 
 pub const RESERVE_FOR_QUESTION_AND_FOLLOWUP: usize = 1024;  // tokens
@@ -236,7 +235,7 @@ pub async fn pp_color_lines(
 
 async fn pp_limit_and_merge(
     lines_in_files: &mut IndexMap<String, Vec<FileLine>>,
-    tokenizer: Arc<RwLock<Tokenizer>>,
+    tokenizer: Option<Arc<Tokenizer>>,
     tokens_limit: usize,
     single_file_mode: bool,
     settings: &PostprocessSettings,
@@ -259,7 +258,7 @@ async fn pp_limit_and_merge(
         if !line_ref.take_ignoring_floor && line_ref.useful <= settings.take_floor {
             continue;
         }
-        let mut ntokens = count_tokens(&tokenizer.read().unwrap(), &line_ref.line_content);
+        let mut ntokens = count_text_tokens_with_fallback(tokenizer.clone(), &line_ref.line_content);
 
         if !files_mentioned_set.contains(&line_ref.file_ref.cpath) {
             if files_mentioned_set.len() >= settings.max_files_n {
@@ -268,7 +267,7 @@ async fn pp_limit_and_merge(
             files_mentioned_set.insert(line_ref.file_ref.cpath.clone());
             files_mentioned_sequence.push(line_ref.file_ref.cpath.clone());
             if !single_file_mode {
-                ntokens += count_tokens(&tokenizer.read().unwrap(), &line_ref.file_ref.cpath.as_str());
+                ntokens += count_text_tokens_with_fallback(tokenizer.clone(), &line_ref.file_ref.cpath.as_str());
                 ntokens += 5;  // a margin for any overhead: file_sep, new line, etc
             }
         }
@@ -350,7 +349,7 @@ async fn pp_limit_and_merge(
 pub async fn postprocess_context_files(
     gcx: Arc<ARwLock<GlobalContext>>,
     context_file_vec: &mut Vec<ContextFile>,
-    tokenizer: Arc<RwLock<Tokenizer>>,
+    tokenizer: Option<Arc<Tokenizer>>,
     tokens_limit: usize,
     single_file_mode: bool,
     settings: &PostprocessSettings,
diff --git a/refact-agent/engine/src/postprocessing/pp_plain_text.rs b/refact-agent/engine/src/postprocessing/pp_plain_text.rs
index e4cbc6b9a..d4b258244 100644
--- a/refact-agent/engine/src/postprocessing/pp_plain_text.rs
+++ b/refact-agent/engine/src/postprocessing/pp_plain_text.rs
@@ -1,21 +1,20 @@
-use std::sync::{Arc, RwLockReadGuard};
-use std::sync::RwLock;
+use std::sync::Arc;
 use tokenizers::Tokenizer;
 
 use crate::call_validation::{ChatContent, ChatMessage};
 use crate::scratchpads::multimodality::MultimodalElement;
-use crate::scratchpads::scratchpad_utils::count_tokens;
+use crate::tokens::count_text_tokens_with_fallback;
 
 
 fn limit_text_content(
-    tokenizer_guard: &RwLockReadGuard<Tokenizer>,
+    tokenizer: Option<Arc<Tokenizer>>,
     text: &String,
     tok_used: &mut usize,
     tok_per_m: usize,
 ) -> String {
     let mut new_text_lines = vec![];
     for line in text.lines() {
-        let line_tokens = count_tokens(tokenizer_guard, &line);
+        let line_tokens = count_text_tokens_with_fallback(tokenizer.clone(), &line);
         if tok_used.clone() + line_tokens > tok_per_m {
             if new_text_lines.is_empty() {
                 new_text_lines.push("No content: tokens limit reached");
@@ -31,7 +30,7 @@ fn limit_text_content(
 
 pub async fn postprocess_plain_text(
     plain_text_messages: Vec<&ChatMessage>,
-    tokenizer: Arc<RwLock<Tokenizer>>,
+    tokenizer: Option<Arc<Tokenizer>>,
     tokens_limit: usize,
     style: &Option<String>,
 ) -> (Vec<ChatMessage>, usize) {
@@ -45,14 +44,13 @@ pub async fn postprocess_plain_text(
     let mut tok_per_m = tokens_limit / messages_sorted.len();
     let mut new_messages = vec![];
 
-    let tokenizer_guard = tokenizer.read().unwrap();
     for (idx, msg) in messages_sorted.iter().cloned().enumerate() {
         let mut tok_used = 0;
         let mut m_cloned = msg.clone();
         
         m_cloned.content = match &msg.content {
             ChatContent::SimpleText(text) => {
-                let new_content = limit_text_content(&tokenizer_guard, text, &mut tok_used, tok_per_m);
+                let new_content = limit_text_content(tokenizer.clone(), text, &mut tok_used, tok_per_m);
                 ChatContent::SimpleText(new_content)
             },
             ChatContent::Multimodal(elements) => {
@@ -61,7 +59,7 @@ pub async fn postprocess_plain_text(
                 for element in elements {
                     if element.is_text() {
                         let mut el_cloned = element.clone();
-                        el_cloned.m_content = limit_text_content(&tokenizer_guard, &el_cloned.m_content, &mut tok_used, tok_per_m);
+                        el_cloned.m_content = limit_text_content(tokenizer.clone(), &el_cloned.m_content, &mut tok_used, tok_per_m);
                         new_content.push(el_cloned)
                     } else if element.is_image() {
                         let tokens = element.count_tokens(None, style).unwrap() as usize;
diff --git a/refact-agent/engine/src/restream.rs b/refact-agent/engine/src/restream.rs
index 60bfded17..ce0bf3bc2 100644
--- a/refact-agent/engine/src/restream.rs
+++ b/refact-agent/engine/src/restream.rs
@@ -1,6 +1,5 @@
-use std::sync::{Arc, RwLock as StdRwLock};
+use std::sync::Arc;
 use tokio::sync::Mutex as AMutex;
-use tokio::sync::RwLock as ARwLock;
 use tokio::sync::mpsc;
 use async_stream::stream;
 use futures::StreamExt;
@@ -9,124 +8,57 @@ use reqwest_eventsource::Event;
 use reqwest_eventsource::Error as REError;
 use serde_json::{json, Value};
 use tracing::info;
+use uuid;
 
 use crate::call_validation::{ChatMeta, SamplingParameters};
+use crate::caps::BaseModelRecord;
 use crate::custom_error::ScratchError;
 use crate::nicer_logs;
 use crate::scratchpad_abstract::{FinishReason, ScratchpadAbstract};
 use crate::telemetry::telemetry_structs;
 use crate::at_commands::at_commands::AtCommandsContext;
-use crate::caps::get_api_key;
-
-
-async fn _get_endpoint_and_stuff_from_model_name(
-    gcx: Arc<ARwLock<crate::global_context::GlobalContext>>,
-    caps: Arc<StdRwLock<crate::caps::CodeAssistantCaps>>,
-    model_name: String,
-) -> (String, String, String, String)
-{
-    let (
-        custom_apikey,
-        mut endpoint_style,
-        custom_endpoint_style,
-        mut endpoint_template,
-        custom_endpoint_template,
-        endpoint_chat_passthrough,
-    ) = {
-        let caps_locked = caps.read().unwrap();
-        if caps_locked.code_chat_models.contains_key(&model_name) {
-            (
-                caps_locked.chat_apikey.clone(),
-                caps_locked.endpoint_style.clone(),      // abstract
-                caps_locked.chat_endpoint_style.clone(), // chat-specific
-                caps_locked.endpoint_template.clone(),   // abstract
-                caps_locked.chat_endpoint.clone(),       // chat-specific
-                caps_locked.endpoint_chat_passthrough.clone(),
-            )
-        } else {
-            (
-                caps_locked.completion_apikey.clone(),
-                caps_locked.endpoint_style.clone(),             // abstract
-                caps_locked.completion_endpoint_style.clone(),  // completion-specific
-                caps_locked.endpoint_template.clone(),          // abstract
-                caps_locked.completion_endpoint.clone(),        // completion-specific
-                "".to_string(),
-            )
-        }
-    };
-    let api_key = get_api_key(gcx, custom_apikey).await;
-    if !custom_endpoint_style.is_empty() {
-        endpoint_style = custom_endpoint_style;
-    }
-    if !custom_endpoint_template.is_empty() {
-        endpoint_template = custom_endpoint_template;
-    }
-    (
-        api_key,
-        endpoint_template,
-        endpoint_style,
-        endpoint_chat_passthrough,
-    )
-}
+
 
 pub async fn scratchpad_interaction_not_stream_json(
     ccx: Arc<AMutex<AtCommandsContext>>,
     scratchpad: &mut Box<dyn ScratchpadAbstract>,
     scope: String,
     prompt: &str,
-    model_name: String,
+    model_rec: &BaseModelRecord,
     parameters: &SamplingParameters,  // includes n
     only_deterministic_messages: bool,
     meta: Option<ChatMeta>
 ) -> Result<serde_json::Value, ScratchError> {
     let t2 = std::time::SystemTime::now();
     let gcx = ccx.lock().await.global_context.clone();
-    let (client, caps, tele_storage, slowdown_arc) = {
+    let (client, tele_storage, slowdown_arc) = {
         let gcx_locked = gcx.write().await;
-        let caps = gcx_locked.caps.clone()
-            .ok_or(ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, "No caps available".to_string()))?;
         (
             gcx_locked.http_client.clone(),
-            caps,
             gcx_locked.telemetry.clone(),
             gcx_locked.http_client_slowdown.clone()
         )
     };
-    let (
-        bearer,
-        endpoint_template,
-        endpoint_style,
-        endpoint_chat_passthrough,
-    ) = _get_endpoint_and_stuff_from_model_name(gcx.clone(), caps.clone(), model_name.clone()).await;
 
     let mut save_url: String = String::new();
     let _ = slowdown_arc.acquire().await;
-    let metadata_supported = crate::global_context::is_metadata_supported(gcx.clone()).await;
     let mut model_says = if only_deterministic_messages {
         save_url = "only-det-messages".to_string();
         Ok(Value::Object(serde_json::Map::new()))
-    } else if endpoint_style == "hf" {
+    } else if model_rec.endpoint_style == "hf" {
         crate::forward_to_hf_endpoint::forward_to_hf_style_endpoint(
-            &mut save_url,
-            bearer.clone(),
-            &model_name,
-            &prompt,
+            &model_rec,
+            prompt,
             &client,
-            &endpoint_template,
             &parameters,
             meta
         ).await
     } else {
         crate::forward_to_openai_endpoint::forward_to_openai_style_endpoint(
-            &mut save_url,
-            bearer.clone(),
-            &model_name,
-            &prompt,
+            &model_rec,
+            prompt,
             &client,
-            &endpoint_template,
-            &endpoint_chat_passthrough,
-            &parameters,  // includes n
-            metadata_supported,
+            &parameters,
             meta
         ).await
     }.map_err(|e| {
@@ -138,6 +70,8 @@ pub async fn scratchpad_interaction_not_stream_json(
             ));
         ScratchError::new_but_skip_telemetry(StatusCode::INTERNAL_SERVER_ERROR, format!("forward_to_endpoint: {}", e))
     })?;
+    generate_id_and_index_for_tool_calls_if_missing(&mut model_says);
+    
     tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
         save_url.clone(),
         scope.clone(),
@@ -253,7 +187,7 @@ pub async fn scratchpad_interaction_not_stream(
     ccx: Arc<AMutex<AtCommandsContext>>,
     scratchpad: &mut Box<dyn ScratchpadAbstract>,
     scope: String,
-    model_name: String,
+    model_rec: &BaseModelRecord,
     parameters: &mut SamplingParameters,
     only_deterministic_messages: bool,
     meta: Option<ChatMeta>
@@ -273,7 +207,7 @@ pub async fn scratchpad_interaction_not_stream(
         scratchpad,
         scope,
         prompt.as_str(),
-        model_name,
+        &model_rec,
         parameters,
         only_deterministic_messages,
         meta
@@ -296,34 +230,26 @@ pub async fn scratchpad_interaction_stream(
     ccx: Arc<AMutex<AtCommandsContext>>,
     mut scratchpad: Box<dyn ScratchpadAbstract>,
     scope: String,
-    mut model_name: String,
+    mut model_rec: BaseModelRecord,
     parameters: SamplingParameters,
     only_deterministic_messages: bool,
     meta: Option<ChatMeta>
 ) -> Result<Response<Body>, ScratchError> {
-    let t1 = std::time::SystemTime::now();
+    let t1: std::time::SystemTime = std::time::SystemTime::now();
     let evstream = stream! {
         let my_scratchpad: &mut Box<dyn ScratchpadAbstract> = &mut scratchpad;
         let mut my_parameters = parameters.clone();
         let my_ccx = ccx.clone();
 
         let gcx = ccx.lock().await.global_context.clone();
-        let (client, caps, tele_storage, slowdown_arc) = {
+        let (client, tele_storage, slowdown_arc) = {
             let gcx_locked = gcx.write().await;
-            let caps = gcx_locked.caps.clone().unwrap();
             (
                 gcx_locked.http_client.clone(),
-                caps,
                 gcx_locked.telemetry.clone(),
                 gcx_locked.http_client_slowdown.clone()
             )
         };
-        let (
-            bearer,
-            endpoint_template,
-            endpoint_style,
-            endpoint_chat_passthrough,
-        ) = _get_endpoint_and_stuff_from_model_name(gcx.clone(), caps.clone(), model_name.clone()).await;
 
         let t0 = std::time::Instant::now();
         let mut prompt = String::new();
@@ -376,7 +302,6 @@ pub async fn scratchpad_interaction_stream(
         }
         info!("scratchpad_interaction_stream prompt {:?}", t0.elapsed());
 
-        let mut save_url: String = String::new();
         let _ = slowdown_arc.acquire().await;
         loop {
             let value_maybe = my_scratchpad.response_spontaneous();
@@ -398,29 +323,20 @@ pub async fn scratchpad_interaction_stream(
                 break;
             }
             // info!("prompt: {:?}", prompt);
-            let metadata_supported = crate::global_context::is_metadata_supported(gcx.clone()).await;
-            let event_source_maybe = if endpoint_style == "hf" {
+            let event_source_maybe = if model_rec.endpoint_style == "hf" {
                 crate::forward_to_hf_endpoint::forward_to_hf_style_endpoint_streaming(
-                    &mut save_url,
-                    bearer.clone(),
-                    &model_name,
-                    prompt.as_str(),
+                    &model_rec,
+                    &prompt,
                     &client,
-                    &endpoint_template,
                     &my_parameters,
                     meta
                 ).await
             } else {
                 crate::forward_to_openai_endpoint::forward_to_openai_style_endpoint_streaming(
-                    &mut save_url,
-                    bearer.clone(),
-                    &model_name,
-                    prompt.as_str(),
+                    &model_rec,
+                    &prompt,
                     &client,
-                    &endpoint_template,
-                    &endpoint_chat_passthrough,
                     &my_parameters,
-                    metadata_supported,
                     meta
                 ).await
             };
@@ -429,15 +345,15 @@ pub async fn scratchpad_interaction_stream(
                 Err(e) => {
                     let e_str = format!("forward_to_endpoint: {:?}", e);
                     tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
-                        save_url.clone(),
+                        model_rec.endpoint.clone(),
                         scope.clone(),
                         false,
                         e_str.to_string(),
                     ));
                     tracing::error!(e_str);
-                    let value_str = serde_json::to_string(&json!({"detail": e_str})).unwrap();
+                    let value_str = format!("data: {}\n\n", serde_json::to_string(&json!({"detail": e_str})).unwrap());
                     yield Result::<_, String>::Ok(value_str);
-                    break;
+                    return;
                 }
             };
             let mut was_correct_output_even_if_error = false;
@@ -451,12 +367,13 @@ pub async fn scratchpad_interaction_stream(
                         if message.data.starts_with("[DONE]") {
                             break;
                         }
-                        let json = serde_json::from_str::<serde_json::Value>(&message.data).unwrap();
+                        let mut json = serde_json::from_str::<serde_json::Value>(&message.data).unwrap();
+                        generate_id_and_index_for_tool_calls_if_missing(&mut json);
                         crate::global_context::look_for_piggyback_fields(gcx.clone(), &json).await;
                         match _push_streaming_json_into_scratchpad(
                             my_scratchpad,
                             &json,
-                            &mut model_name,
+                            &mut model_rec.name,
                             &mut was_correct_output_even_if_error,
                         ) {
                             Ok((mut value, finish_reason)) => {
@@ -503,13 +420,13 @@ pub async fn scratchpad_interaction_stream(
                         tracing::error!("restream error: {}\n", problem_str);
                         {
                             tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
-                                save_url.clone(),
+                                model_rec.endpoint.clone(),
                                 scope.clone(),
                                 false,
                                 problem_str.clone(),
                             ));
                         }
-                        yield Result::<_, String>::Ok(serde_json::to_string(&json!({"detail": problem_str})).unwrap());
+                        yield Result::<_, String>::Ok(format!("data: {}\n\n", serde_json::to_string(&json!({"detail": problem_str})).unwrap()));
                         event_source.close();
                         return;
                     },
@@ -518,7 +435,7 @@ pub async fn scratchpad_interaction_stream(
 
             let mut value = my_scratchpad.streaming_finished(last_finish_reason)?;
             value["created"] = json!(t1.duration_since(std::time::UNIX_EPOCH).unwrap().as_secs_f64());
-            value["model"] = json!(model_name.clone());
+            value["model"] = json!(model_rec.name.clone());
             let value_str = format!("data: {}\n\n", serde_json::to_string(&value).unwrap());
             info!("yield final: {:?}", value_str);
             yield Result::<_, String>::Ok(value_str);
@@ -527,7 +444,7 @@ pub async fn scratchpad_interaction_stream(
         info!("yield: [DONE]");
         yield Result::<_, String>::Ok("data: [DONE]\n\n".to_string());
         tele_storage.write().unwrap().tele_net.push(telemetry_structs::TelemetryNetwork::new(
-            save_url.clone(),
+            model_rec.endpoint.clone(),
             scope.clone(),
             true,
             "".to_string(),
@@ -581,6 +498,44 @@ pub fn try_insert_usage(msg_value: &mut serde_json::Value) -> bool {
     return false;
 }
 
+/// Generates tool call ID and index for tool calls missing them, required by providers like Gemini
+fn generate_id_and_index_for_tool_calls_if_missing(value: &mut serde_json::Value) {
+    fn process_tool_call(tool_call: &mut serde_json::Value, idx: usize) {
+        if let Some(id) = tool_call.get_mut("id") {
+            if id.is_string() && id.as_str().unwrap_or("").is_empty() {
+                let uuid = uuid::Uuid::new_v4().to_string().replace("-", "");
+                *id = json!(format!("call_{uuid}"));
+                tracing::info!("Generated UUID for empty tool call ID: call_{}", uuid);
+            }
+        }
+        if tool_call.get("index").is_none() {
+            tool_call["index"] = json!(idx);
+        }
+    }
+
+    if let Some(tool_calls) = value.get_mut("tool_calls").and_then(|tc| tc.as_array_mut()) {
+        for (i, tool_call) in tool_calls.iter_mut().enumerate() {
+            process_tool_call(tool_call, i);
+        }
+    }
+    
+    if let Some(choices) = value.get_mut("choices").and_then(|c| c.as_array_mut()) {
+        for choice in choices {
+            for field in ["delta", "message"] {
+                if let Some(tool_calls) = choice.get_mut(field)
+                    .and_then(|v| v.get_mut("tool_calls"))
+                    .and_then(|tc| tc.as_array_mut()) 
+                {
+                    for (i, tool_call) in tool_calls.iter_mut().enumerate() {
+                        process_tool_call(tool_call, i);
+                    }
+                }
+            }
+        }
+    }
+}
+
+
 fn _push_streaming_json_into_scratchpad(
     scratch: &mut Box<dyn ScratchpadAbstract>,
     json: &serde_json::Value,
@@ -624,6 +579,8 @@ fn _push_streaming_json_into_scratchpad(
         }
         value["model"] = json!(model_name.clone());
         Ok((value, finish_reason))
+    } else if json.get("type").and_then(|t| t.as_str()) == Some("ping") {
+        Ok((serde_json::value::Value::Null, FinishReason::None))
     } else if let Some(err) = json.get("error") {
         Err(format!("{}", err))
     } else if let Some(msg) = json.get("human_readable_message") {
diff --git a/refact-agent/engine/src/scratchpad_abstract.rs b/refact-agent/engine/src/scratchpad_abstract.rs
index 0ae9ef1fc..cfa463d3c 100644
--- a/refact-agent/engine/src/scratchpad_abstract.rs
+++ b/refact-agent/engine/src/scratchpad_abstract.rs
@@ -1,6 +1,5 @@
 use serde_json;
 use std::sync::Arc;
-use std::sync::RwLock;
 use tokio::sync::Mutex as AMutex;
 use tokenizers::Tokenizer;
 use async_trait::async_trait;
@@ -8,6 +7,7 @@ use serde_json::Value;
 
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::SamplingParameters;
+use crate::tokens::count_text_tokens;
 
 use tracing::warn;
 
@@ -120,7 +120,7 @@ pub trait ScratchpadAbstract: Send {
 // aggregate this struct to make scratchpad implementation easier
 #[derive(Debug, Clone)]
 pub struct HasTokenizerAndEot {
-    pub tokenizer: Arc<RwLock<Tokenizer>>,
+    pub tokenizer: Option<Arc<Tokenizer>>,
     pub eot: String,
     pub eos: String,
     pub context_format: String,
@@ -128,7 +128,7 @@ pub struct HasTokenizerAndEot {
 }
 
 impl HasTokenizerAndEot {
-    pub fn new(tokenizer: Arc<RwLock<Tokenizer>>) -> Self {
+    pub fn new(tokenizer: Option<Arc<Tokenizer>>) -> Self {
         HasTokenizerAndEot { tokenizer, eot: String::new(), eos: String::new(), context_format: String::new(), rag_ratio: 0.5}
     }
 
@@ -136,24 +136,23 @@ impl HasTokenizerAndEot {
         &self,
         text: &str,
     ) -> Result<i32, String> {
-        let tokenizer = self.tokenizer.write().unwrap();
-        let tokens = tokenizer.encode(text, false).map_err(|err| {
-            return format!("Encoding error: {}", err);
-        })?;
-        Ok(tokens.len() as i32)
+        count_text_tokens(self.tokenizer.clone(), text).map(|t| t as i32)
     }
 
     pub fn assert_one_token(
         &self,
         text: &str
     ) -> Result<(), String> {
-        let tokenizer = self.tokenizer.write().unwrap();
-        let tokens = tokenizer.encode(text, false).map_err(|err| {
-            format!("assert_one_token: {}", err)
-        })?;
-        if tokens.len() != 1 {
-            return Err(format!("assert_one_token: expected 1 token for \"{}\", got {}", text, tokens.len()));
+        if self.tokenizer.is_none() {
+            return Err("assert_one_token: no tokenizer".to_string());
+        }
+
+        let token_count = count_text_tokens(self.tokenizer.clone(), text)?;
+
+        if token_count != 1 {
+            Err(format!("assert_one_token: expected 1 token for \"{text}\", got {token_count}"))
+        } else {
+            Ok(())
         }
-        Ok(())
     }
 }
diff --git a/refact-agent/engine/src/scratchpads/chat_generic.rs b/refact-agent/engine/src/scratchpads/chat_generic.rs
index c0e24056a..40c941471 100644
--- a/refact-agent/engine/src/scratchpads/chat_generic.rs
+++ b/refact-agent/engine/src/scratchpads/chat_generic.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock;
 
 use async_trait::async_trait;
 use serde_json::Value;
@@ -40,7 +39,7 @@ pub struct GenericChatScratchpad {
 
 impl GenericChatScratchpad {
     pub fn new(
-        tokenizer: Arc<RwLock<Tokenizer>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         post: &ChatPost,
         messages: &Vec<ChatMessage>,
         prepend_system_prompt: bool,
diff --git a/refact-agent/engine/src/scratchpads/chat_passthrough.rs b/refact-agent/engine/src/scratchpads/chat_passthrough.rs
index 0af619f3d..8fbe8ccde 100644
--- a/refact-agent/engine/src/scratchpads/chat_passthrough.rs
+++ b/refact-agent/engine/src/scratchpads/chat_passthrough.rs
@@ -1,5 +1,4 @@
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 use indexmap::IndexMap;
 use serde_json::{json, Value};
 use tokenizers::Tokenizer;
@@ -10,6 +9,7 @@ use tracing::info;
 use crate::at_commands::execute_at::{run_at_commands_locally, run_at_commands_remotely};
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{ChatMessage, ChatPost, ReasoningEffort, SamplingParameters};
+use crate::caps::resolve_chat_model;
 use crate::http::http_get_json;
 use crate::integrations::docker::docker_container_manager::docker_container_get_host_lsp_port_to_connect;
 use crate::scratchpad_abstract::{FinishReason, HasTokenizerAndEot, ScratchpadAbstract};
@@ -68,7 +68,7 @@ pub struct ChatPassthrough {
 
 impl ChatPassthrough {
     pub fn new(
-        tokenizer: Arc<StdRwLock<Tokenizer>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         post: &ChatPost,
         messages: &Vec<ChatMessage>,
         prepend_system_prompt: bool,
@@ -215,23 +215,17 @@ impl ScratchpadAbstract for ChatPassthrough {
             let gcx_locked = gcx.write().await;
             gcx_locked.caps.clone().unwrap()
         };
-        let model_record_mb = {
-            let caps_locked = caps.read().unwrap();
-            caps_locked.code_chat_models.get(&self.post.model).cloned()
-        };
+        let model_record_mb = resolve_chat_model(caps, &self.post.model).ok();
 
-        let supports_reasoning = if let Some(model_record) = model_record_mb.clone() {
-            !model_record.supports_reasoning.is_none()
-        } else {
-            false
-        };
+        let supports_reasoning = model_record_mb.as_ref()
+            .map_or(false, |m| m.supports_reasoning.is_some());
 
         let limited_adapted_msgs = if supports_reasoning {
-            let model_record = model_record_mb.unwrap();
+            let model_record = model_record_mb.clone().unwrap();
             _adapt_for_reasoning_models(
-                &limited_msgs,
+                limited_msgs,
                 sampling_parameters_to_patch,
-                model_record.supports_reasoning.unwrap(),
+                model_record.supports_reasoning.as_ref().unwrap().clone(),
                 model_record.default_temperature.clone(),
                 model_record.supports_boost_reasoning.clone(),
             )
@@ -239,7 +233,8 @@ impl ScratchpadAbstract for ChatPassthrough {
             limited_msgs
         };
 
-        let converted_messages = convert_messages_to_openai_format(limited_adapted_msgs, &style);
+        let model_id = model_record_mb.map(|m| m.base.id.clone()).unwrap_or_default();
+        let converted_messages = convert_messages_to_openai_format(limited_adapted_msgs, &style, &model_id);
         big_json["messages"] = json!(converted_messages);
         big_json["compression_strength"] = json!(compression_strength);
 
@@ -285,7 +280,7 @@ impl ScratchpadAbstract for ChatPassthrough {
 }
 
 fn _adapt_for_reasoning_models(
-    messages: &Vec<ChatMessage>,
+    messages: Vec<ChatMessage>,
     sampling_parameters: &mut SamplingParameters,
     supports_reasoning: String,
     default_temperature: Option<f32>,
@@ -299,8 +294,7 @@ fn _adapt_for_reasoning_models(
             sampling_parameters.temperature = default_temperature;
 
             // NOTE: OpenAI prefer user message over system
-            messages.iter().map(|msg| {
-                let mut msg = msg.clone();
+            messages.into_iter().map(|mut msg| {
                 if msg.role == "system" {
                     msg.role = "user".to_string();
                 }
@@ -319,11 +313,11 @@ fn _adapt_for_reasoning_models(
                     "budget_tokens": budget_tokens,
                 }));
             }
-            messages.clone()
+            messages
         },
         _ => {
             sampling_parameters.temperature = default_temperature.clone();
-            messages.clone()
+            messages
         }
     }
 }
diff --git a/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs b/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs
index a03b2d268..aef5f3044 100644
--- a/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs
+++ b/refact-agent/engine/src/scratchpads/chat_utils_limit_history.rs
@@ -40,15 +40,15 @@ pub enum CompressionStrength {
 /// 
 /// # Arguments
 /// 
-/// * `model_name` - The name of the model (e.g., "claude-3-7-sonnet")
+/// * `model_id` - Provider / Model name (e.g., "Refact/claude-3-7-sonnet")
 /// 
 /// # Returns
 /// 
 /// A tuple containing (EXTRA_TOKENS_PER_MESSAGE, EXTRA_BUDGET_OFFSET_PERC)
-pub fn get_model_token_params(model_name: &str) -> (i32, f32) {
-    match model_name {
+pub fn get_model_token_params(model_id: &str) -> (i32, f32) {
+    match model_id {
         // Claude 3 Sonnet models need higher token overhead
-        "claude-3-7-sonnet" | "claude-3-5-sonnet" => (150, 0.2),
+        m if m.contains("claude-3-7-sonnet") | m.contains("claude-3-5-sonnet") => (150, 0.2),
         
         // Default values for all other models
         _ => (3, 0.0),
@@ -60,11 +60,11 @@ fn recalculate_token_limits(
     tools_description_tokens: i32,
     n_ctx: usize,
     max_new_tokens: usize,
-    model_name: &str,
+    model_id: &str,
 ) -> (i32, i32) {
     let occupied_tokens = token_counts.iter().sum::<i32>() + tools_description_tokens;
     
-    let (_, extra_budget_offset_perc) = get_model_token_params(model_name);
+    let (_, extra_budget_offset_perc) = get_model_token_params(model_id);
     
     let extra_budget = (n_ctx as f32 * extra_budget_offset_perc) as usize;
     let tokens_limit = n_ctx.saturating_sub(max_new_tokens).saturating_sub(extra_budget) as i32;
@@ -77,7 +77,7 @@ fn compress_message_at_index(
     token_counts: &mut Vec<i32>,
     token_cache: &mut TokenCountCache,
     index: usize,
-    model_name: &str,
+    model_id: &str,
 ) -> Result<i32, String> {
     let role = &mutable_messages[index].role;
     let new_summary = if role == "context_file" {
@@ -115,7 +115,7 @@ fn compress_message_at_index(
     
     mutable_messages[index].content = ChatContent::SimpleText(new_summary);
     token_cache.invalidate(&mutable_messages[index]);
-    let (extra_tokens_per_message, _) = get_model_token_params(model_name);
+    let (extra_tokens_per_message, _) = get_model_token_params(model_id);
     // Recalculate token usage after compression using the cache
     token_counts[index] = token_cache.get_token_count(&mutable_messages[index], t.tokenizer.clone(), extra_tokens_per_message)?;
     Ok(token_counts[index])
@@ -132,14 +132,14 @@ fn process_compression_stage(
     start_idx: usize,
     end_idx: usize,
     stage_name: &str,
-    model_name: &str,
+    model_id: &str,
     message_filter: impl Fn(usize, &ChatMessage, i32) -> bool,
     sort_by_size: bool,
 ) -> Result<(i32, i32, bool), String> {
     tracing::info!("n_ctx={n_ctx}, max_new_tokens={max_new_tokens}");
     tracing::info!("STAGE: {}", stage_name);
     let (mut occupied_tokens, tokens_limit) = 
-        recalculate_token_limits(token_counts, tools_description_tokens, n_ctx, max_new_tokens, model_name);
+        recalculate_token_limits(token_counts, tools_description_tokens, n_ctx, max_new_tokens, model_id);
     let mut budget_reached = false;
     let messages_len = mutable_messages.len();
     let end = std::cmp::min(end_idx, messages_len);
@@ -164,7 +164,7 @@ fn process_compression_stage(
     }
     
     for (i, original_tokens) in indices_to_process {
-        compress_message_at_index(t, mutable_messages, token_counts, token_cache, i, model_name)?;
+        compress_message_at_index(t, mutable_messages, token_counts, token_cache, i, model_id)?;
         let token_delta = token_counts[i] - original_tokens;
         occupied_tokens += token_delta;
         tracing::info!("Compressed message at index {}: token count {} -> {} (saved {})", 
@@ -488,7 +488,7 @@ pub fn fix_and_limit_messages_history(
     sampling_parameters_to_patch: &mut SamplingParameters,
     n_ctx: usize,
     tools_description: Option<String>,
-    model_name: &str,
+    model_id: &str,
 ) -> Result<(Vec<ChatMessage>, CompressionStrength), String> {
     let start_time = Instant::now();
     
@@ -516,7 +516,7 @@ pub fn fix_and_limit_messages_history(
         16000
     );
 
-    let (extra_tokens_per_message, _) = get_model_token_params(model_name);
+    let (extra_tokens_per_message, _) = get_model_token_params(model_id);
     let mut token_cache = TokenCountCache::new();
     let mut token_counts: Vec<i32> = Vec::with_capacity(mutable_messages.len());
     for msg in &mutable_messages {
@@ -532,7 +532,7 @@ pub fn fix_and_limit_messages_history(
     tracing::info!("Calculated undroppable_msg_n = {} (last user message)", undroppable_msg_n);
     let outlier_threshold = 1000;
     let (mut occupied_tokens, mut tokens_limit) = 
-        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_name);
+        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_id);
     tracing::info!("Before compression: occupied_tokens={} vs tokens_limit={}", occupied_tokens, tokens_limit);
     
     // STAGE 1: Compress ContextFile messages before the last user message
@@ -550,7 +550,7 @@ pub fn fix_and_limit_messages_history(
             1, // Start from index 1 to preserve the initial message
             stage1_end,
             "Stage 1: Compressing ContextFile messages before the last user message",
-            model_name,
+            model_id,
             |i, msg, _| i != 0 && msg.role == "context_file" && !preserve_in_later_stages[i],
             true
         )?;
@@ -579,7 +579,7 @@ pub fn fix_and_limit_messages_history(
             1, // Start from index 1 to preserve the initial message
             stage2_end,
             "Stage 2: Compressing Tool Result messages before the last user message",
-            model_name,
+            model_id,
             |i, msg, _| i != 0 && msg.role == "tool",
             true
         )?;
@@ -608,7 +608,7 @@ pub fn fix_and_limit_messages_history(
             1, // Start from index 1 to preserve the initial message
             stage3_end,
             "Stage 3: Compressing outlier messages before the last user message",
-            model_name,
+            model_id,
             |i, msg, token_count| {
                 i != 0 && 
                 token_count > outlier_threshold && 
@@ -717,7 +717,7 @@ pub fn fix_and_limit_messages_history(
             undroppable_msg_n,
             msg_len,
             "Stage 5: Compressing ContextFile messages after the last user message (last resort)",
-            model_name,
+            model_id,
             |_, msg, _| msg.role == "context_file",
             true
         )?;
@@ -744,7 +744,7 @@ pub fn fix_and_limit_messages_history(
             undroppable_msg_n,
             msg_len,
             "Stage 6: Compressing Tool Result messages after the last user message (last resort)",
-            model_name,
+            model_id,
             |_, msg, _| msg.role == "tool",
             true
         )?;
@@ -772,7 +772,7 @@ pub fn fix_and_limit_messages_history(
             undroppable_msg_n,
             msg_len,
             "Stage 7: Compressing outlier messages in the last conversation block (last resort)",
-            model_name,
+            model_id,
             |i, msg, token_count| {
                 i >= undroppable_msg_n &&
                 token_count > outlier_threshold && 
@@ -791,7 +791,7 @@ pub fn fix_and_limit_messages_history(
 
     remove_invalid_tool_calls_and_tool_calls_results(&mut mutable_messages);
     let (occupied_tokens, tokens_limit) =
-        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_name);
+        recalculate_token_limits(&token_counts, tools_description_tokens, n_ctx, sampling_parameters_to_patch.max_new_tokens, model_id);
     tracing::info!("Final occupied_tokens={} <= tokens_limit={}", occupied_tokens, tokens_limit);
 
     // If we're still over the limit after all compression stages, return an error
@@ -1248,14 +1248,22 @@ mod tests {
 
     impl HasTokenizerAndEot {
         fn mock() -> Arc<Self> {
-            use std::sync::RwLock;
             use tokenizers::Tokenizer;
             use tokenizers::models::wordpiece::WordPiece;
-            let wordpiece = WordPiece::default();
+            use std::collections::HashMap;
+
+            let mut vocab = HashMap::new();
+            vocab.insert("[UNK]".to_string(), 0);
+
+            let wordpiece = WordPiece::builder()
+                .vocab(vocab)
+                .unk_token("[UNK]".to_string())
+                .build()
+                .unwrap();
             let mock_tokenizer = Tokenizer::new(wordpiece);
 
             Arc::new(Self {
-                tokenizer: Arc::new(RwLock::new(mock_tokenizer)),
+                tokenizer: Some(Arc::new(mock_tokenizer)),
                 eot: "".to_string(),
                 eos: "".to_string(),
                 context_format: "".to_string(),
diff --git a/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs b/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs
index 2a310c656..069b7bca1 100644
--- a/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs
+++ b/refact-agent/engine/src/scratchpads/chat_utils_prompts.rs
@@ -19,12 +19,7 @@ pub async fn get_default_system_prompt(
     let mut error_log = Vec::new();
     let tconfig = crate::yaml_configs::customization_loader::load_customization(gcx.clone(), true, &mut error_log).await;
     for e in error_log.iter() {
-        tracing::error!(
-            "{}:{} {:?}",
-            crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-            e.error_line,
-            e.error_msg,
-        );
+        tracing::error!("{e}");
     }
     let prompt_key = match chat_mode {
         ChatMode::NO_TOOLS => "default",
diff --git a/refact-agent/engine/src/scratchpads/code_completion_fim.rs b/refact-agent/engine/src/scratchpads/code_completion_fim.rs
index 2ef1f5a26..90f46367f 100644
--- a/refact-agent/engine/src/scratchpads/code_completion_fim.rs
+++ b/refact-agent/engine/src/scratchpads/code_completion_fim.rs
@@ -39,7 +39,7 @@ pub struct FillInTheMiddleScratchpad {
 
 impl FillInTheMiddleScratchpad {
     pub fn new(
-        tokenizer: Arc<StdRwLock<Tokenizer>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         post: &CodeCompletionPost,
         order: String,
         cache_arc: Arc<StdRwLock<completion_cache::CompletionCache>>,
@@ -91,12 +91,14 @@ impl ScratchpadAbstract for FillInTheMiddleScratchpad {
         self.t.eos = patch.get("eos").and_then(|x| x.as_str()).unwrap_or("").to_string();
         self.t.context_format = patch.get("context_format").and_then(|x| x.as_str()).unwrap_or_default().to_string();
         self.t.rag_ratio = patch.get("rag_ratio").and_then(|x| x.as_f64()).unwrap_or(0.5);
-        self.t.assert_one_token(&self.fim_prefix.as_str())?;
-        self.t.assert_one_token(&self.fim_suffix.as_str())?;
-        self.t.assert_one_token(&self.fim_middle.as_str())?;
-        self.t.assert_one_token(&self.t.eot.as_str())?;
-        if !self.t.eos.is_empty() {
-            self.t.assert_one_token(&self.t.eos.as_str())?;
+        if self.t.tokenizer.is_some() {
+            self.t.assert_one_token(&self.fim_prefix.as_str())?;
+            self.t.assert_one_token(&self.fim_suffix.as_str())?;
+            self.t.assert_one_token(&self.fim_middle.as_str())?;
+            self.t.assert_one_token(&self.t.eot.as_str())?;
+            if !self.t.eos.is_empty() {
+                self.t.assert_one_token(&self.t.eos.as_str())?;
+            }
         }
         Ok(())
     }
diff --git a/refact-agent/engine/src/scratchpads/code_completion_replace.rs b/refact-agent/engine/src/scratchpads/code_completion_replace.rs
index 2e232c85c..7acf0ab2f 100644
--- a/refact-agent/engine/src/scratchpads/code_completion_replace.rs
+++ b/refact-agent/engine/src/scratchpads/code_completion_replace.rs
@@ -3,6 +3,7 @@ use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{
     ChatContent, ChatMessage, CodeCompletionPost, CursorPosition, SamplingParameters,
 };
+use crate::caps::resolve_completion_model;
 use crate::completion_cache;
 use crate::global_context::GlobalContext;
 use crate::scratchpad_abstract::{FinishReason, HasTokenizerAndEot, ScratchpadAbstract};
@@ -201,14 +202,16 @@ async fn prepare_subblock(
     if let Some(symbol) = get_cursor_symbol_from_doc(ast_service.clone(), cpath, cursor_pos).await {
         let min_rows_to_include = 2;
         for idx in symbol.full_line1().saturating_sub(1)..symbol.full_line2() + 1 {
-            let line = file_text.line(idx).to_string();
-            tokens_used += tokenizer.count_tokens(&line).unwrap_or(0) as usize;
-            if idx < cursor_pos.line as usize {
-                subblock.before_lines.push(line);
-            } else if idx > cursor_pos.line as usize {
-                subblock.after_lines_extra.push(line.clone());
-                if tokens_used <= max_tokens || subblock.after_lines.len() < min_rows_to_include {
-                    subblock.after_lines.push(line);
+            if idx < file_text.len_lines() {
+                let line = file_text.line(idx).to_string();
+                tokens_used += tokenizer.count_tokens(&line).unwrap_or(0) as usize;
+                if idx < cursor_pos.line as usize {
+                    subblock.before_lines.push(line);
+                } else if idx > cursor_pos.line as usize {
+                    subblock.after_lines_extra.push(line.clone());
+                    if tokens_used <= max_tokens || subblock.after_lines.len() < min_rows_to_include {
+                        subblock.after_lines.push(line);
+                    }
                 }
             }
         }
@@ -556,7 +559,7 @@ pub struct CodeCompletionReplaceScratchpad {
 
 impl CodeCompletionReplaceScratchpad {
     pub fn new(
-        tokenizer: Arc<StdRwLock<Tokenizer>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         post: &CodeCompletionPost,
         cache_arc: Arc<StdRwLock<completion_cache::CompletionCache>>,
         tele_storage: Arc<StdRwLock<telemetry_structs::Storage>>,
@@ -646,17 +649,19 @@ impl ScratchpadAbstract for CodeCompletionReplaceScratchpad {
             .get("rag_ratio")
             .and_then(|x| x.as_f64())
             .unwrap_or(0.5);
-        if !self.token_bos.is_empty() {
-            self.t.assert_one_token(&self.token_bos.as_str())?;
-        }
-        if !self.token_esc.is_empty() {
-            self.t.assert_one_token(&self.token_esc.as_str())?;
-        }
-        if !self.t.eot.is_empty() {
-            self.t.assert_one_token(&self.t.eot.as_str())?;
-        }
-        if !self.t.eos.is_empty() {
-            self.t.assert_one_token(&self.t.eos.as_str())?;
+        if self.t.tokenizer.is_some() {
+            if !self.token_bos.is_empty() {
+                self.t.assert_one_token(&self.token_bos.as_str())?;
+            }
+            if !self.token_esc.is_empty() {
+                self.t.assert_one_token(&self.token_esc.as_str())?;
+            }
+            if !self.t.eot.is_empty() {
+                self.t.assert_one_token(&self.t.eot.as_str())?;
+            }
+            if !self.t.eos.is_empty() {
+                self.t.assert_one_token(&self.t.eos.as_str())?;
+            }
         }
         Ok(())
     }
@@ -843,7 +848,7 @@ pub struct CodeCompletionReplacePassthroughScratchpad {
 
 impl CodeCompletionReplacePassthroughScratchpad {
     pub fn new(
-        tokenizer: Arc<StdRwLock<Tokenizer>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         post: &CodeCompletionPost,
         cache_arc: Arc<StdRwLock<completion_cache::CompletionCache>>,
         tele_storage: Arc<StdRwLock<telemetry_structs::Storage>>,
@@ -891,10 +896,11 @@ impl ScratchpadAbstract for CodeCompletionReplacePassthroughScratchpad {
         ccx: Arc<AMutex<AtCommandsContext>>,
         sampling_parameters_to_patch: &mut SamplingParameters,
     ) -> Result<String, String> {
-        let (n_ctx, _gcx) = {
+        let (n_ctx, gcx) = {
             let ccx_locked = ccx.lock().await;
             (ccx_locked.n_ctx, ccx_locked.global_context.clone())
         };
+        let caps = gcx.read().await.caps.clone().ok_or_else(|| "No caps".to_string())?;
         let completion_t0 = Instant::now();
         let use_rag = self.t.rag_ratio > 0.0 && self.post.use_ast && self.ast_service.is_some();
         sampling_parameters_to_patch.max_new_tokens = MAX_NEW_TOKENS;
@@ -1005,8 +1011,9 @@ impl ScratchpadAbstract for CodeCompletionReplacePassthroughScratchpad {
             ..Default::default()
         });
 
+        let model = resolve_completion_model(caps.clone(), &self.post.model, true)?;
         let json_messages = &serde_json::to_string(&json!({
-            "messages":  messages.iter().map(|x| { x.into_value(&None) }).collect::<Vec<_>>(),
+            "messages":  messages.iter().map(|x| { x.into_value(&None, &model.base.id) }).collect::<Vec<_>>(),
         }))
         .unwrap();
         let prompt = format!("PASSTHROUGH {json_messages}").to_string();
diff --git a/refact-agent/engine/src/scratchpads/mod.rs b/refact-agent/engine/src/scratchpads/mod.rs
index 0b41eb8e3..7ac1b0831 100644
--- a/refact-agent/engine/src/scratchpads/mod.rs
+++ b/refact-agent/engine/src/scratchpads/mod.rs
@@ -1,7 +1,6 @@
 use std::sync::Arc;
 use std::sync::RwLock as StdRwLock;
 use tokio::sync::{Mutex as AMutex, RwLock as ARwLock};
-use tokenizers::Tokenizer;
 
 pub mod code_completion_fim;
 pub mod chat_generic;
@@ -20,12 +19,13 @@ mod completon_rag;
 use crate::ast::ast_indexer_thread::AstIndexService;
 use crate::call_validation::{ChatMessage, CodeCompletionPost};
 use crate::call_validation::ChatPost;
+use crate::caps::ChatModelRecord;
+use crate::caps::CompletionModelRecord;
 use crate::global_context::GlobalContext;
-use crate::caps::CodeAssistantCaps;
 use crate::scratchpad_abstract::ScratchpadAbstract;
 use crate::completion_cache;
 use crate::telemetry::telemetry_structs;
-use crate::cached_tokenizers;
+use crate::tokens;
 
 
 fn verify_has_send<T: Send>(_x: &T) {}
@@ -33,66 +33,58 @@ fn verify_has_send<T: Send>(_x: &T) {}
 
 pub async fn create_code_completion_scratchpad(
     global_context: Arc<ARwLock<GlobalContext>>,
-    caps: Arc<StdRwLock<CodeAssistantCaps>>,
-    model_name_for_tokenizer: String,
+    model_rec: &CompletionModelRecord,
     post: &CodeCompletionPost,
-    scratchpad_name: &str,
-    scratchpad_patch: &serde_json::Value,
     cache_arc: Arc<StdRwLock<completion_cache::CompletionCache>>,
     tele_storage: Arc<StdRwLock<telemetry_structs::Storage>>,
     ast_module: Option<Arc<AMutex<AstIndexService>>>,
 ) -> Result<Box<dyn ScratchpadAbstract>, String> {
     let mut result: Box<dyn ScratchpadAbstract>;
-    let tokenizer_arc: Arc<StdRwLock<Tokenizer>> = cached_tokenizers::cached_tokenizer(caps, global_context.clone(), model_name_for_tokenizer).await?;
-    if scratchpad_name == "FIM-PSM" {
+    let tokenizer_arc = crate::tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await?;
+    if model_rec.scratchpad == "FIM-PSM" {
         result = Box::new(code_completion_fim::FillInTheMiddleScratchpad::new(
             tokenizer_arc, &post, "PSM".to_string(), cache_arc, tele_storage, ast_module, global_context.clone()
         ))
-    } else if scratchpad_name == "FIM-SPM" {
+    } else if model_rec.scratchpad == "FIM-SPM" {
         result = Box::new(code_completion_fim::FillInTheMiddleScratchpad::new(
             tokenizer_arc, &post, "SPM".to_string(), cache_arc, tele_storage, ast_module, global_context.clone()
         ))
-    } else if scratchpad_name == "REPLACE" {
+    } else if model_rec.scratchpad == "REPLACE" {
         result = Box::new(code_completion_replace::CodeCompletionReplaceScratchpad::new(
             tokenizer_arc, &post, cache_arc, tele_storage, ast_module, global_context.clone()
         ))
-    } else if scratchpad_name == "REPLACE_PASSTHROUGH" {
+    } else if model_rec.scratchpad == "REPLACE_PASSTHROUGH" {
         result = Box::new(code_completion_replace::CodeCompletionReplacePassthroughScratchpad::new(
             tokenizer_arc, &post, cache_arc, tele_storage, ast_module, global_context.clone()
         ))
     } else {
-        return Err(format!("This rust binary doesn't have code completion scratchpad \"{}\" compiled in", scratchpad_name));
+        return Err(format!("This rust binary doesn't have code completion scratchpad \"{}\" compiled in", model_rec.scratchpad));
     }
-    result.apply_model_adaptation_patch(scratchpad_patch, false, false).await?;
+    result.apply_model_adaptation_patch(&model_rec.scratchpad_patch, false, false).await?;
     verify_has_send(&result);
     Ok(result)
 }
 
 pub async fn create_chat_scratchpad(
     global_context: Arc<ARwLock<GlobalContext>>,
-    caps: Arc<StdRwLock<CodeAssistantCaps>>,
-    model_name_for_tokenizer: String,
     post: &mut ChatPost,
     messages: &Vec<ChatMessage>,
     prepend_system_prompt: bool,
-    scratchpad_name: &str,
-    scratchpad_patch: &serde_json::Value,
+    model_rec: &ChatModelRecord,
     allow_at: bool,
-    supports_tools: bool,
-    supports_clicks: bool,
 ) -> Result<Box<dyn ScratchpadAbstract>, String> {
     let mut result: Box<dyn ScratchpadAbstract>;
-    let tokenizer_arc = cached_tokenizers::cached_tokenizer(caps, global_context.clone(), model_name_for_tokenizer).await?;
-    if scratchpad_name == "CHAT-GENERIC" {
+    let tokenizer_arc = tokens::cached_tokenizer(global_context.clone(), &model_rec.base).await?;
+    if model_rec.scratchpad == "CHAT-GENERIC" {
         result = Box::new(chat_generic::GenericChatScratchpad::new(
             tokenizer_arc.clone(), post, messages, prepend_system_prompt, allow_at
         ));
-    } else if scratchpad_name == "PASSTHROUGH" {
+    } else if model_rec.scratchpad == "PASSTHROUGH" {
         result = Box::new(chat_passthrough::ChatPassthrough::new(
-            tokenizer_arc.clone(), post, messages, prepend_system_prompt, allow_at, supports_tools, supports_clicks
+            tokenizer_arc.clone(), post, messages, prepend_system_prompt, allow_at, model_rec.supports_tools, model_rec.supports_clicks
         ));
     } else {
-        return Err(format!("This rust binary doesn't have chat scratchpad \"{}\" compiled in", scratchpad_name));
+        return Err(format!("This rust binary doesn't have chat scratchpad \"{}\" compiled in", model_rec.scratchpad));
     }
     let mut exploration_tools: bool = false;
     let mut agentic_tools: bool = false;
@@ -111,7 +103,7 @@ pub async fn create_chat_scratchpad(
             }
         }
     }
-    result.apply_model_adaptation_patch(scratchpad_patch, exploration_tools, agentic_tools).await?;
+    result.apply_model_adaptation_patch(&model_rec.scratchpad_patch, exploration_tools, agentic_tools).await?;
     verify_has_send(&result);
     Ok(result)
 }
diff --git a/refact-agent/engine/src/scratchpads/multimodality.rs b/refact-agent/engine/src/scratchpads/multimodality.rs
index 5365c90e0..d03d413e9 100644
--- a/refact-agent/engine/src/scratchpads/multimodality.rs
+++ b/refact-agent/engine/src/scratchpads/multimodality.rs
@@ -1,9 +1,10 @@
 use serde::{Deserialize, Deserializer, Serialize};
-use std::sync::{Arc, RwLock, RwLockReadGuard};
+use std::sync::Arc;
 use serde_json::{json, Value};
 use tokenizers::Tokenizer;
 use crate::call_validation::{ChatContent, ChatMessage, ChatToolCall};
-use crate::scratchpads::scratchpad_utils::{calculate_image_tokens_openai, count_tokens as count_tokens_simple_text, image_reader_from_b64string, parse_image_b64_from_image_url_openai};
+use crate::scratchpads::scratchpad_utils::{calculate_image_tokens_openai, image_reader_from_b64string, parse_image_b64_from_image_url_openai};
+use crate::tokens::count_text_tokens;
 
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
@@ -76,13 +77,9 @@ impl MultimodalElement {
         })
     }
 
-    pub fn count_tokens(&self, tokenizer: Option<&RwLockReadGuard<Tokenizer>>, style: &Option<String>) -> Result<i32, String> {
+    pub fn count_tokens(&self, tokenizer: Option<Arc<Tokenizer>>, style: &Option<String>) -> Result<i32, String> {
         if self.is_text() {
-            if let Some(tokenizer) = tokenizer {
-                Ok(count_tokens_simple_text(&tokenizer, &self.m_content) as i32)
-            } else {
-                return Err("count_tokens() received no tokenizer".to_string());
-            }
+            Ok(count_text_tokens(tokenizer, &self.m_content)? as i32)
         } else if self.is_image() {
             let style = style.clone().unwrap_or("openai".to_string());
             match style.as_str() {
@@ -157,6 +154,13 @@ impl ChatContentRaw {
             }
         }
     }
+
+    pub fn is_empty(&self) -> bool {
+        match self {
+            ChatContentRaw::SimpleText(text) => text.is_empty(),
+            ChatContentRaw::Multimodal(elements) => elements.is_empty(),
+        }
+    }
 }
 
 impl ChatContent {
@@ -171,7 +175,7 @@ impl ChatContent {
         }
     }
 
-    pub fn size_estimate(&self, tokenizer: Arc<RwLock<Tokenizer>>, style: &Option<String>) -> usize {
+    pub fn size_estimate(&self, tokenizer: Option<Arc<Tokenizer>>, style: &Option<String>) -> usize {
         match self {
             ChatContent::SimpleText(text) => text.len(),
             ChatContent::Multimodal(_elements) => {
@@ -181,12 +185,11 @@ impl ChatContent {
         }
     }
 
-    pub fn count_tokens(&self, tokenizer: Arc<RwLock<Tokenizer>>, style: &Option<String>) -> Result<i32, String> {
-        let tokenizer_lock = tokenizer.read().unwrap();
+    pub fn count_tokens(&self, tokenizer: Option<Arc<Tokenizer>>, style: &Option<String>) -> Result<i32, String> {
         match self {
-            ChatContent::SimpleText(text) => Ok(count_tokens_simple_text(&tokenizer_lock, text) as i32),
+            ChatContent::SimpleText(text) => Ok(count_text_tokens(tokenizer, text)? as i32),
             ChatContent::Multimodal(elements) => elements.iter()
-                .map(|e|e.count_tokens(Some(&tokenizer_lock), style))
+                .map(|e|e.count_tokens(tokenizer.clone(), style))
                 .collect::<Result<Vec<_>, _>>()
                 .map(|counts| counts.iter().sum()),
         }
@@ -254,14 +257,19 @@ impl ChatMessage {
         }
     }
 
-    pub fn into_value(&self, style: &Option<String>) -> Value {
+    pub fn into_value(&self, style: &Option<String>, model_id: &str) -> Value {
         let mut dict = serde_json::Map::new();
         let chat_content_raw = self.content.into_raw(style);
-
         dict.insert("role".to_string(), Value::String(self.role.clone()));
-        dict.insert("content".to_string(), json!(chat_content_raw));
-        dict.insert("tool_calls".to_string(), json!(self.tool_calls.clone()));
-        dict.insert("tool_call_id".to_string(), Value::String(self.tool_call_id.clone()));
+        if model_supports_empty_strings(model_id) || !chat_content_raw.is_empty() {
+            dict.insert("content".to_string(), json!(chat_content_raw));
+        }
+        if let Some(tool_calls) = self.tool_calls.clone() {
+            dict.insert("tool_calls".to_string(), json!(tool_calls));
+        }
+        if !self.tool_call_id.is_empty() {
+            dict.insert("tool_call_id".to_string(), Value::String(self.tool_call_id.clone()));
+        }
         if let Some(thinking_blocks) = self.thinking_blocks.clone() {
             dict.insert("thinking_blocks".to_string(), json!(thinking_blocks));
         }
@@ -312,3 +320,8 @@ impl<'de> Deserialize<'de> for ChatMessage {
         })
     }
 }
+
+/// If API supports sending fields with empty strings
+fn model_supports_empty_strings(model_id: &str) -> bool {
+    !model_id.starts_with("google_gemini/")
+}
\ No newline at end of file
diff --git a/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs b/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs
index eae6727c9..c45dbdfad 100644
--- a/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs
+++ b/refact-agent/engine/src/scratchpads/passthrough_convert_messages.rs
@@ -4,7 +4,7 @@ use tracing::{error, warn};
 use crate::call_validation::{ChatContent, ChatMessage, ContextFile, DiffChunk};
 
 
-pub fn convert_messages_to_openai_format(messages: Vec<ChatMessage>, style: &Option<String>) -> Vec<Value> {
+pub fn convert_messages_to_openai_format(messages: Vec<ChatMessage>, style: &Option<String>, model_id: &str) -> Vec<Value> {
     let mut results = vec![];
     let mut delay_images = vec![];
 
@@ -26,28 +26,28 @@ pub fn convert_messages_to_openai_format(messages: Vec<ChatMessage>, style: &Opt
                     };
                     let mut msg_cloned = msg.clone();
                     msg_cloned.content = ChatContent::SimpleText(text);
-                    results.push(msg_cloned.into_value(&style));
+                    results.push(msg_cloned.into_value(&style, model_id));
                     if !images.is_empty() {
                         let msg_img = ChatMessage {
                             role: "user".to_string(),
                             content: ChatContent::Multimodal(images.into_iter().cloned().collect()),
                             ..Default::default()
                         };
-                        delay_images.push(msg_img.into_value(&style));
+                        delay_images.push(msg_img.into_value(&style, model_id));
                     }
                 },
                 ChatContent::SimpleText(_) => {
-                    results.push(msg.into_value(&style));
+                    results.push(msg.into_value(&style, model_id));
                 }
             }
 
         } else if msg.role == "assistant" || msg.role == "system" {
             flush_delayed_images(&mut results, &mut delay_images);
-            results.push(msg.into_value(&style));
+            results.push(msg.into_value(&style, model_id));
 
         } else if msg.role == "user" {
             flush_delayed_images(&mut results, &mut delay_images);
-            results.push(msg.into_value(&style));
+            results.push(msg.into_value(&style, model_id));
 
         } else if msg.role == "diff" {
             let extra_message = match serde_json::from_str::<Vec<DiffChunk>>(&msg.content.content_text_only()) {
@@ -66,14 +66,14 @@ pub fn convert_messages_to_openai_format(messages: Vec<ChatMessage>, style: &Opt
                 tool_call_id: msg.tool_call_id.clone(),
                 ..Default::default()
             };
-            results.push(tool_msg.into_value(&style));
+            results.push(tool_msg.into_value(&style, model_id));
 
         } else if msg.role == "plain_text" || msg.role == "cd_instruction" {
             flush_delayed_images(&mut results, &mut delay_images);
             results.push(ChatMessage::new(
                 "user".to_string(),
                 msg.content.content_text_only(),
-            ).into_value(&style));
+            ).into_value(&style, model_id));
 
         } else if msg.role == "context_file" {
             flush_delayed_images(&mut results, &mut delay_images);
@@ -87,7 +87,7 @@ pub fn convert_messages_to_openai_format(messages: Vec<ChatMessage>, style: &Opt
                                     context_file.line1,
                                     context_file.line2,
                                     context_file.file_content),
-                        ).into_value(&style));
+                        ).into_value(&style, model_id));
                     }
                 },
                 Err(e) => { error!("error parsing context file: {}", e); }
@@ -190,7 +190,7 @@ mod tests {
         let roles_out_expected = expected_output.iter().map(|x| x.get("role").unwrap().as_str().unwrap().to_string()).collect::<Vec<_>>();
 
         let style = Some("openai".to_string());
-        let output = convert_messages_to_openai_format(messages, &style);
+        let output = convert_messages_to_openai_format(messages, &style, "Refact/gpt-4o");
 
         // println!("OUTPUT: {:#?}", output);
         let roles_out = output.iter().map(|x| x.get("role").unwrap().as_str().unwrap().to_string()).collect::<Vec<_>>();
diff --git a/refact-agent/engine/src/scratchpads/scratchpad_utils.rs b/refact-agent/engine/src/scratchpads/scratchpad_utils.rs
index 4d535c4bb..eb5a797b0 100644
--- a/refact-agent/engine/src/scratchpads/scratchpad_utils.rs
+++ b/refact-agent/engine/src/scratchpads/scratchpad_utils.rs
@@ -2,7 +2,6 @@ use std::io::Cursor;
 use image::ImageReader;
 use regex::Regex;
 use serde_json::Value;
-use tokenizers::Tokenizer;
 use crate::call_validation::{ChatToolCall, ContextFile};
 use crate::postprocessing::pp_context_files::RESERVE_FOR_QUESTION_AND_FOLLOWUP;
 
@@ -34,16 +33,6 @@ impl HasRagResults {
     }
 }
 
-pub fn count_tokens(
-    tokenizer: &Tokenizer,
-    text: &str,
-) -> usize {
-    match tokenizer.encode_fast(text, false) {
-        Ok(tokens) => tokens.len(),
-        Err(_) => 0,
-    }
-}
-
 pub fn parse_image_b64_from_image_url_openai(image_url: &str) -> Option<(String, String, String)> {
     let re = Regex::new(r"data:(image/(png|jpeg|jpg|webp|gif));base64,([A-Za-z0-9+/=]+)").unwrap();
     re.captures(image_url).and_then(|captures| {
diff --git a/refact-agent/engine/src/scratchpads/token_count_cache.rs b/refact-agent/engine/src/scratchpads/token_count_cache.rs
index 327eef2ac..936e4d766 100644
--- a/refact-agent/engine/src/scratchpads/token_count_cache.rs
+++ b/refact-agent/engine/src/scratchpads/token_count_cache.rs
@@ -1,6 +1,5 @@
 use std::collections::HashMap;
 use std::sync::Arc;
-use std::sync::RwLock;
 use tokenizers::Tokenizer;
 use crate::call_validation::ChatMessage;
 
@@ -28,7 +27,7 @@ impl TokenCountCache {
     pub fn get_token_count(
         &mut self,
         msg: &ChatMessage,
-        tokenizer: Arc<RwLock<Tokenizer>>,
+        tokenizer: Option<Arc<Tokenizer>>,
         extra_tokens_per_message: i32,
     ) -> Result<i32, String> {
         let key = Self::cache_key(msg);
diff --git a/refact-agent/engine/src/subchat.rs b/refact-agent/engine/src/subchat.rs
index 2531da670..f5e301ec4 100644
--- a/refact-agent/engine/src/subchat.rs
+++ b/refact-agent/engine/src/subchat.rs
@@ -5,11 +5,12 @@ use tokio::sync::Mutex as AMutex;
 use serde_json::{json, Value};
 use tracing::{error, info, warn};
 
+use crate::caps::resolve_chat_model;
+use crate::caps::ChatModelRecord;
 use crate::tools::tools_description::{tools_merged_and_filtered, tool_description_list_from_yaml};
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{SamplingParameters, PostprocessSettings, ChatPost, ChatMessage, ChatUsage, ChatToolCall, ReasoningEffort};
-use crate::global_context::{GlobalContext, try_load_caps_quickly_if_not_present, is_metadata_supported};
-use crate::http::routers::v1::chat::lookup_chat_scratchpad;
+use crate::global_context::{GlobalContext, try_load_caps_quickly_if_not_present};
 use crate::scratchpad_abstract::ScratchpadAbstract;
 use crate::scratchpads::multimodality::chat_content_raw_from_value;
 use crate::yaml_configs::customization_loader::load_customization;
@@ -21,7 +22,7 @@ const MAX_NEW_TOKENS: usize = 4096;
 pub async fn create_chat_post_and_scratchpad(
     global_context: Arc<ARwLock<GlobalContext>>,
     ccx: Arc<AMutex<AtCommandsContext>>,
-    model_name: &str,
+    model_id: &str,
     messages: Vec<&ChatMessage>,
     temperature: Option<f32>,
     max_new_tokens: usize,
@@ -32,7 +33,7 @@ pub async fn create_chat_post_and_scratchpad(
     tool_choice: Option<String>,
     only_deterministic_messages: bool,
     _should_execute_remotely: bool,
-) -> Result<(ChatPost, Box<dyn ScratchpadAbstract>), String> {
+) -> Result<(ChatPost, Box<dyn ScratchpadAbstract>, Arc<ChatModelRecord>), String> {
     let caps = try_load_caps_quickly_if_not_present(
         global_context.clone(), 0,
     ).await.map_err(|e| {
@@ -42,12 +43,7 @@ pub async fn create_chat_post_and_scratchpad(
     let mut error_log = Vec::new();
     let tconfig = load_customization(global_context.clone(), true, &mut error_log).await;
     for e in error_log.iter() {
-        tracing::error!(
-            "{}:{} {:?}",
-            crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-            e.error_line,
-            e.error_msg,
-        );
+        tracing::error!("{e}");
     }
 
     let mut chat_post = ChatPost {
@@ -61,8 +57,7 @@ pub async fn create_chat_post_and_scratchpad(
             reasoning_effort,
             ..Default::default()  // TODO
         },
-        model: model_name.to_string(),
-        scratchpad: "".to_string(),
+        model: model_id.to_string(),
         stream: Some(false),
         temperature,
         n: Some(n),
@@ -74,38 +69,29 @@ pub async fn create_chat_post_and_scratchpad(
         ..Default::default()
     };
 
-    let (model_name, scratchpad_name, scratchpad_patch, n_ctx, supports_tools, _supports_multimodality, supports_clicks) = lookup_chat_scratchpad(
-        caps.clone(),
-        &chat_post,
-    ).await?;
+    let model_rec = resolve_chat_model(caps, model_id)?;
 
-    if !supports_tools {
-        warn!("supports_tools is false");
+    if !model_rec.supports_tools {
+        tracing::warn!("supports_tools is false");
     }
 
-    chat_post.max_tokens = Some(n_ctx);
-    chat_post.scratchpad = scratchpad_name.clone();
+    chat_post.max_tokens = Some(model_rec.base.n_ctx);
 
     {
         let mut ccx_locked = ccx.lock().await;
-        ccx_locked.current_model = model_name.to_string();
+        ccx_locked.current_model = model_id.to_string();
     }
 
     let scratchpad = crate::scratchpads::create_chat_scratchpad(
         global_context.clone(),
-        caps,
-        model_name.to_string(),
         &mut chat_post,
         &messages.into_iter().cloned().collect::<Vec<_>>(),
         prepend_system_prompt,
-        &scratchpad_name,
-        &scratchpad_patch,
+        &model_rec,
         false,
-        supports_tools,
-        supports_clicks,
     ).await?;
 
-    Ok((chat_post, scratchpad))
+    Ok((chat_post, scratchpad, model_rec))
 }
 
 #[allow(dead_code)]
@@ -116,16 +102,14 @@ async fn chat_interaction_stream() {
 async fn chat_interaction_non_stream(
     ccx: Arc<AMutex<AtCommandsContext>>,
     mut spad: Box<dyn ScratchpadAbstract>,
+    model_rec: &ChatModelRecord,
     prompt: &String,
     chat_post: &ChatPost,
 ) -> Result<Vec<Vec<ChatMessage>>, String> {
-    let meta = {
-        let gcx = ccx.lock().await.global_context.clone();
-        if is_metadata_supported(gcx).await {
-            Some(chat_post.meta.clone())
-        } else {
-            None
-        }
+    let meta = if model_rec.base.support_metadata {
+        Some(chat_post.meta.clone())
+    } else {
+        None
     };
     
     let t1 = std::time::Instant::now();
@@ -134,7 +118,7 @@ async fn chat_interaction_non_stream(
         &mut spad,
         "chat".to_string(),
         prompt,
-        chat_post.model.clone(),
+        &model_rec.base,
         &chat_post.parameters,   // careful: includes n
         chat_post.only_deterministic_messages,
         meta
@@ -234,6 +218,7 @@ async fn chat_interaction_non_stream(
 pub async fn chat_interaction(
     ccx: Arc<AMutex<AtCommandsContext>>,
     mut spad: Box<dyn ScratchpadAbstract>,
+    model_rec: &ChatModelRecord,
     chat_post: &mut ChatPost,
 ) -> Result<Vec<Vec<ChatMessage>>, String> {
     let prompt = spad.prompt(ccx.clone(), &mut chat_post.parameters).await?;
@@ -244,6 +229,7 @@ pub async fn chat_interaction(
     Ok(chat_interaction_non_stream(
         ccx.clone(),
         spad,
+        model_rec,
         &prompt,
         chat_post,
     ).await?)
@@ -264,7 +250,7 @@ fn update_usage_from_messages(usage: &mut ChatUsage, messages: &Vec<Vec<ChatMess
 
 pub async fn subchat_single(
     ccx: Arc<AMutex<AtCommandsContext>>,
-    model_name: &str,
+    model_id: &str,
     messages: Vec<ChatMessage>,
     tools_subset: Option<Vec<String>>,
     tool_choice: Option<String>,
@@ -295,16 +281,16 @@ pub async fn subchat_single(
         error!("Error loading compiled_in_tools: {:?}", e);
         vec![]
     });
-    let tools = tools_desclist.into_iter().filter(|x| x.is_supported_by(model_name)).map(|x|x.into_openai_style()).collect::<Vec<_>>();
+    let tools = tools_desclist.into_iter().filter(|x| x.is_supported_by(model_id)).map(|x|x.into_openai_style()).collect::<Vec<_>>();
     info!("tools_subset {:?}", tools_subset);
     info!("tools_turned_on_by_cmdline_set {:?}", tools_turned_on_by_cmdline_set);
     info!("tools_on_intersection {:?}", tools_on_intersection);
 
     let max_new_tokens = max_new_tokens.unwrap_or(MAX_NEW_TOKENS);
-    let (mut chat_post, spad) = create_chat_post_and_scratchpad(
+    let (mut chat_post, spad, model_rec) = create_chat_post_and_scratchpad(
         gcx.clone(),
         ccx.clone(),
-        model_name,
+        model_id,
         messages.iter().collect::<Vec<_>>(),
         temperature,
         max_new_tokens,
@@ -317,7 +303,7 @@ pub async fn subchat_single(
         should_execute_remotely,
     ).await?;
 
-    let chat_response_msgs = chat_interaction(ccx.clone(), spad, &mut chat_post).await?;
+    let chat_response_msgs = chat_interaction(ccx.clone(), spad, &model_rec, &mut chat_post).await?;
 
     let old_messages = messages.clone();
     // no need to remove user from old_messages here, because allow_at is false
@@ -355,7 +341,7 @@ pub async fn subchat_single(
 
 pub async fn subchat(
     ccx: Arc<AMutex<AtCommandsContext>>,
-    model_name: &str,
+    model_id: &str,
     messages: Vec<ChatMessage>,
     tools_subset: Vec<String>,
     wrap_up_depth: usize,
@@ -393,7 +379,7 @@ pub async fn subchat(
             }
             messages = subchat_single(
                 ccx.clone(),
-                model_name,
+                model_id,
                 messages.clone(),
                 Some(tools_subset.clone()),
                 Some("auto".to_string()),
@@ -416,7 +402,7 @@ pub async fn subchat(
         if !tool_calls.is_empty() {
             messages = subchat_single(
                 ccx.clone(),
-                model_name,
+                model_id,
                 messages,
                 Some(vec![]),
                 Some("none".to_string()),
@@ -435,7 +421,7 @@ pub async fn subchat(
     messages.push(ChatMessage::new("user".to_string(), wrap_up_prompt.to_string()));
     let choices = subchat_single(
         ccx.clone(),
-        model_name,
+        model_id,
         messages,
         Some(tools_subset.clone()),
         Some("auto".to_string()),
@@ -455,7 +441,7 @@ pub async fn subchat(
             if !tool_calls.is_empty() {
                 _ = subchat_single(
                     ccx.clone(),
-                    model_name,
+                    model_id,
                     messages.clone(),
                     Some(vec![]),
                     Some("none".to_string()),
diff --git a/refact-agent/engine/src/telemetry/basic_transmit.rs b/refact-agent/engine/src/telemetry/basic_transmit.rs
index ea3ba1e9a..d46a91643 100644
--- a/refact-agent/engine/src/telemetry/basic_transmit.rs
+++ b/refact-agent/engine/src/telemetry/basic_transmit.rs
@@ -1,5 +1,5 @@
 use tracing::{error, info};
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use std::path::PathBuf;
 use serde_json::json;
 
@@ -107,7 +107,7 @@ pub async fn basic_telemetry_compress(
 
 pub async fn basic_telemetry_send(
     global_context: Arc<ARwLock<GlobalContext>>,
-    caps: Arc<RwLock<CodeAssistantCaps>>,
+    caps: Arc<CodeAssistantCaps>,
 ) -> () {
     let (cache_dir, api_key, enable_basic_telemetry) = {
         let cx = global_context.write().await;
@@ -119,13 +119,11 @@ pub async fn basic_telemetry_send(
     };
     let (dir_compressed, dir_sent) = telemetry_storage_dirs(&cache_dir).await;
 
-    let telemetry_basic_dest = caps.read().unwrap().telemetry_basic_dest.clone();
-
-    if enable_basic_telemetry && !telemetry_basic_dest.is_empty() {
+    if enable_basic_telemetry && !caps.telemetry_basic_dest.is_empty() {
         send_telemetry_files_to_mothership(
             dir_compressed.clone(),
             dir_sent.clone(),
-            telemetry_basic_dest.clone(),
+            caps.telemetry_basic_dest.clone(),
             api_key,
             global_context.clone()
         ).await;
@@ -133,7 +131,7 @@ pub async fn basic_telemetry_send(
         if !enable_basic_telemetry {
             info!("basic telemetry sending not enabled, skip");
         }
-        if telemetry_basic_dest.is_empty() {
+        if caps.telemetry_basic_dest.is_empty() {
             info!("basic telemetry dest is empty, skip");
         }
     }
diff --git a/refact-agent/engine/src/tokens.rs b/refact-agent/engine/src/tokens.rs
new file mode 100644
index 000000000..0a7b7f438
--- /dev/null
+++ b/refact-agent/engine/src/tokens.rs
@@ -0,0 +1,226 @@
+use tokio::io::AsyncWriteExt;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::RwLock as ARwLock;
+use tokio::sync::Mutex as AMutex;
+use tokenizers::Tokenizer;
+use reqwest::header::AUTHORIZATION;
+use reqwest::Response;
+use uuid::Uuid;
+
+use crate::custom_error::MapErrToString;
+use crate::files_correction::canonical_path;
+use crate::global_context::GlobalContext;
+use crate::caps::{default_hf_tokenizer_template, strip_model_from_finetune, BaseModelRecord};
+
+
+async fn try_open_tokenizer(
+    res: Response,
+    to: impl AsRef<Path>,
+) -> Result<(), String> {
+    let mut file = tokio::fs::OpenOptions::new()
+        .write(true)
+        .create(true)
+        .open(&to)
+        .await
+        .map_err(|e| format!("failed to open file: {}", e))?;
+    file.write_all(&res.bytes().await
+        .map_err(|e| format!("failed to fetch bytes: {}", e))?
+    ).await.map_err(|e| format!("failed to write to file: {}", e))?;
+    file.flush().await.map_err(|e| format!("failed to flush file: {}", e))?;
+    tracing::info!("saved tokenizer to {}", to.as_ref().display());
+    Ok(())
+}
+
+async fn download_tokenizer_file(
+    http_client: &reqwest::Client,
+    http_path: &str,
+    tokenizer_api_token: &str,
+    to: &Path,
+) -> Result<(), String> {
+    tokio::fs::create_dir_all(
+        to.parent().ok_or_else(|| "tokenizer path has no parent")?,
+    ).await.map_err(|e| format!("failed to create parent dir: {}", e))?;
+    if to.exists() {
+        return Ok(());
+    }
+
+    tracing::info!("downloading tokenizer from {}", http_path);
+    let mut req = http_client.get(http_path);
+    
+    if !tokenizer_api_token.is_empty() {
+        req = req.header(AUTHORIZATION, format!("Bearer {tokenizer_api_token}"))
+    }
+    
+    let res = req
+        .send()
+        .await
+        .map_err(|e| format!("failed to get response: {}", e))?
+        .error_for_status()
+        .map_err(|e| format!("failed to get response: {}", e))?;
+    try_open_tokenizer(res, to).await?;
+    Ok(())
+}
+
+fn check_json_file(path: &Path) -> bool {
+    match Tokenizer::from_file(path) {
+        Ok(_) => { true }
+        Err(_) => { false }
+    }
+}
+
+async fn try_download_tokenizer_file_and_open(
+    http_client: &reqwest::Client,
+    http_path: &str,
+    tokenizer_api_token: &str,
+    path: &Path,
+) -> Result<(), String> {
+    if path.exists() && check_json_file(path) {
+        return Ok(());
+    }
+
+    let tmp_file = std::env::temp_dir().join(Uuid::new_v4().to_string());
+    let tmp_path = tmp_file.as_path();
+    
+    // Track the last error message
+    let mut last_error = String::from("");
+    for i in 0..15 {
+        if i != 0 {
+            tokio::time::sleep(Duration::from_millis(200)).await;
+        }
+        let res = download_tokenizer_file(http_client, http_path, tokenizer_api_token, tmp_path).await;
+        if let Err(err_msg) = res {
+            last_error = format!("failed to download tokenizer: {}", err_msg);
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        let parent = path.parent();
+        if parent.is_none() {
+            last_error = String::from("failed to download tokenizer: parent is not set");
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        let res = tokio::fs::create_dir_all(parent.unwrap()).await;
+        if let Err(err_msg) = res {
+            last_error = format!("failed to create parent dir: {}", err_msg);
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        if !check_json_file(tmp_path) {
+            last_error = String::from("failed to download tokenizer: file is not a tokenizer");
+            tracing::error!("{last_error}");
+            continue;
+        }
+
+        match tokio::fs::copy(tmp_path, path).await {
+            Ok(_) => {
+                tracing::info!("moved tokenizer to {}", path.display());
+                return Ok(());
+            },
+            Err(e) => { 
+                last_error = format!("failed to copy tokenizer file: {}", e);
+                tracing::error!("{last_error}");
+                continue; 
+            }
+        }
+    }
+    Err(last_error)
+}
+
+pub async fn cached_tokenizer(
+    global_context: Arc<ARwLock<GlobalContext>>,
+    model_rec: &BaseModelRecord,
+) -> Result<Option<Arc<Tokenizer>>, String> {
+    let model_id = strip_model_from_finetune(&model_rec.id);
+    let tokenizer_download_lock: Arc<AMutex<bool>> = global_context.read().await.tokenizer_download_lock.clone();
+    let _tokenizer_download_locked = tokenizer_download_lock.lock().await;
+
+    let (client2, cache_dir, tokenizer_in_gcx, hf_tokenizer_template) = {
+        let cx_locked = global_context.read().await;
+        let template = cx_locked.caps.clone().map(|caps| caps.hf_tokenizer_template.clone())
+            .unwrap_or_else(default_hf_tokenizer_template);
+        (cx_locked.http_client.clone(), cx_locked.cache_dir.clone(), cx_locked.tokenizer_map.clone().get(&model_id).cloned(), template)
+    };
+
+    if let Some(tokenizer) = tokenizer_in_gcx {
+        return Ok(tokenizer)
+    }
+
+    let (mut tok_file_path, tok_url) = match &model_rec.tokenizer {
+        empty_tok if empty_tok.is_empty() => return Err(format!("failed to load tokenizer: empty tokenizer for {model_id}")),
+        fake_tok if fake_tok.starts_with("fake") => return Ok(None),
+        hf_tok if hf_tok.starts_with("hf://") => {
+            let hf_model = hf_tok.strip_prefix("hf://").unwrap();
+            let url = hf_tokenizer_template.replace("$HF_MODEL", hf_model);
+            (PathBuf::new(), url)
+        }
+        http_tok if http_tok.starts_with("http://") || http_tok.starts_with("https://") => {
+            (PathBuf::new(), http_tok.to_string())
+        }
+        file_tok => {
+            let file = if file_tok.starts_with("file://") {
+                url::Url::parse(file_tok)
+                    .and_then(|url| url.to_file_path().map_err(|_| url::ParseError::EmptyHost))
+                    .map_err_with_prefix(format!("Invalid path URL {file_tok}:"))?
+            } else {
+                canonical_path(file_tok)
+            };
+            (canonical_path(file.to_string_lossy()), "".to_string())
+        }
+    };
+
+    if tok_file_path.as_os_str().is_empty() {
+        let tokenizer_cache_dir = std::path::PathBuf::from(cache_dir).join("tokenizers");
+        let sanitized_model_id = model_id.chars()
+            .map(|c| if c.is_alphanumeric() { c } else { '_' })
+            .collect::<String>();
+        
+        tok_file_path = tokenizer_cache_dir.join(&sanitized_model_id).join("tokenizer.json");
+
+        try_download_tokenizer_file_and_open(&client2, &tok_url, &model_rec.tokenizer_api_key, &tok_file_path).await?;
+    }
+    
+    tracing::info!("loading tokenizer \"{}\"", tok_file_path.display());
+    let mut tokenizer = Tokenizer::from_file(tok_file_path)
+        .map_err(|e| format!("failed to load tokenizer: {}", e))?;
+    let _ = tokenizer.with_truncation(None);
+    tokenizer.with_padding(None);
+    let arc = Some(Arc::new(tokenizer));
+
+    global_context.write().await.tokenizer_map.insert(model_id, arc.clone());
+    Ok(arc)
+}
+
+/// Estimate as length / 3.5, since 3 is reasonable estimate for code, and 4 for natural language
+fn estimate_tokens(text: &str) -> usize {  1 + text.len() * 2 / 7 }
+
+pub fn count_text_tokens(
+    tokenizer: Option<Arc<Tokenizer>>,
+    text: &str,
+) -> Result<usize, String> {
+    match tokenizer {
+        Some(tokenizer) => {
+            match tokenizer.encode_fast(text, false) {
+                Ok(tokens) => Ok(tokens.len()),
+                Err(e) => Err(format!("Encoding error: {e}")),
+            }
+        }
+        None => {
+            Ok(estimate_tokens(text))
+        }
+    }
+}
+
+pub fn count_text_tokens_with_fallback(
+    tokenizer: Option<Arc<Tokenizer>>,
+    text: &str,
+) -> usize {
+    count_text_tokens(tokenizer, text).unwrap_or_else(|e| {
+        tracing::error!("{e}");
+        estimate_tokens(text)
+    })
+}
\ No newline at end of file
diff --git a/refact-agent/engine/src/tools/tool_create_memory_bank.rs b/refact-agent/engine/src/tools/tool_create_memory_bank.rs
index dad5a9713..46db232b9 100644
--- a/refact-agent/engine/src/tools/tool_create_memory_bank.rs
+++ b/refact-agent/engine/src/tools/tool_create_memory_bank.rs
@@ -14,7 +14,6 @@ use crate::{
         at_commands::AtCommandsContext,
         at_tree::{construct_tree_out_of_flat_list_of_paths, PathsHolderNodeArc},
     },
-    cached_tokenizers,
     call_validation::{ChatContent, ChatMessage, ChatUsage, ContextEnum, ContextFile, PostprocessSettings},
     files_correction::{get_project_dirs, paths_from_anywhere},
     files_in_workspace::{get_file_text_from_memory_or_disk, ls_files},
@@ -23,7 +22,7 @@ use crate::{
     subchat::subchat,
     tools::tools_description::Tool,
 };
-use crate::call_validation::ReasoningEffort;
+use crate::caps::resolve_chat_model;
 use crate::global_context::try_load_caps_quickly_if_not_present;
 
 const MAX_EXPLORATION_STEPS: usize = 1000;
@@ -263,7 +262,8 @@ async fn read_and_compress_directory(
     }
 
     let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await.map_err(|x| x.message)?;
-    let tokenizer = cached_tokenizers::cached_tokenizer(caps, gcx.clone(), model).await.map_err(|e| format!("Tokenizer error: {}", e))?;
+    let model_rec = resolve_chat_model(caps, &model)?;
+    let tokenizer = crate::tokens::cached_tokenizer(gcx.clone(), &model_rec.base).await?;
     let mut pp_settings = PostprocessSettings::new();
     pp_settings.max_files_n = context_files.len();
     let compressed = postprocess_context_files(
@@ -427,7 +427,7 @@ impl Tool for ToolCreateMemoryBank {
                     MB_EXPERT_WRAP_UP,
                     1,
                     None,
-                    Some(ReasoningEffort::High),
+                    None,
                     Some(tool_call_id.clone()),
                     Some(format!("{log_prefix}-memory-bank-dir-{}", target.target_name.replace("/", "_"))),
                     Some(false),
diff --git a/refact-agent/engine/src/tools/tool_deep_analysis.rs b/refact-agent/engine/src/tools/tool_deep_analysis.rs
index 70fe0f0a4..c833269a6 100644
--- a/refact-agent/engine/src/tools/tool_deep_analysis.rs
+++ b/refact-agent/engine/src/tools/tool_deep_analysis.rs
@@ -5,15 +5,15 @@ use serde_json::Value;
 use tokio::sync::Mutex as AMutex;
 use async_trait::async_trait;
 use axum::http::StatusCode;
+use crate::caps::resolve_chat_model;
 use crate::subchat::subchat_single;
+use crate::tokens::count_text_tokens_with_fallback;
 use crate::tools::tools_description::Tool;
 use crate::call_validation::{ChatMessage, ChatContent, ChatUsage, ContextEnum, SubchatParameters, ContextFile, PostprocessSettings};
 use crate::at_commands::at_commands::AtCommandsContext;
-use crate::cached_tokenizers;
 use crate::custom_error::ScratchError;
 use crate::global_context::try_load_caps_quickly_if_not_present;
 use crate::postprocessing::pp_context_files::postprocess_context_files;
-use crate::scratchpads::scratchpad_utils::count_tokens;
 
 pub struct ToolDeepAnalysis;
 
@@ -29,12 +29,13 @@ async fn _make_prompt(
 ) -> Result<String, String> {
     let gcx = ccx.lock().await.global_context.clone();
     let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await.map_err(|x| x.message)?;
-    let tokenizer = cached_tokenizers::cached_tokenizer(caps, gcx.clone(), subchat_params.subchat_model.to_string()).await
-        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Error loading tokenizer: {}", e))).map_err(|x| x.message)?;
+    let model_rec = resolve_chat_model(caps, &subchat_params.subchat_model)?;
+    let tokenizer = crate::tokens::cached_tokenizer(gcx.clone(), &model_rec.base).await
+        .map_err(|e| ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, e)).map_err(|x| x.message)?;
     let tokens_extra_budget = (subchat_params.subchat_n_ctx as f32 * TOKENS_EXTRA_BUDGET_PERCENT) as usize;
     let mut tokens_budget: i64 = (subchat_params.subchat_n_ctx - subchat_params.subchat_max_new_tokens - subchat_params.subchat_tokens_for_rag - tokens_extra_budget) as i64;
     let final_message = format!("***Problem:***\n{problem_statement}\n\n***Problem context:***\n");
-    tokens_budget -= count_tokens(&tokenizer.read().unwrap(), &final_message) as i64;
+    tokens_budget -= count_text_tokens_with_fallback(tokenizer.clone(), &final_message) as i64;
     let mut context = "".to_string(); 
     let mut context_files: Vec<ContextFile> = vec![];
     for message in previous_messages.iter().rev() {
@@ -62,7 +63,7 @@ async fn _make_prompt(
                 continue;
             }
         };
-        let left_tokens = tokens_budget - count_tokens(&tokenizer.read().unwrap(), &message_row) as i64;
+        let left_tokens = tokens_budget - count_text_tokens_with_fallback(tokenizer.clone(), &message_row) as i64;
         if left_tokens < 0 {
             // we do not end here, maybe there are smaller useful messages at the beginning
             continue;
@@ -180,7 +181,7 @@ impl Tool for ToolDeepAnalysis {
     }
 
     fn tool_depends_on(&self) -> Vec<String> {
-        vec![]
+        vec!["thinking".to_string()]
     }
 }
 
diff --git a/refact-agent/engine/src/tools/tools_description.rs b/refact-agent/engine/src/tools/tools_description.rs
index 51888e098..8af93006d 100644
--- a/refact-agent/engine/src/tools/tools_description.rs
+++ b/refact-agent/engine/src/tools/tools_description.rs
@@ -9,6 +9,7 @@ use tokio::sync::Mutex as AMutex;
 
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::call_validation::{ChatUsage, ContextEnum};
+use crate::global_context::try_load_caps_quickly_if_not_present;
 use crate::global_context::GlobalContext;
 use crate::integrations::integr_abstract::IntegrationConfirmation;
 use crate::tools::tools_execute::{command_should_be_confirmed_by_user, command_should_be_denied};
@@ -159,6 +160,11 @@ pub async fn tools_merged_and_filtered(
     ).await;
     tools_all.extend(integrations);
 
+    let is_there_a_thinking_model = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
+        Ok(caps) => caps.chat_models.get(&caps.defaults.chat_thinking_model).is_some(),
+        Err(_) => false,
+    };
+
     let mut filtered_tools = IndexMap::new();
     for (tool_name, tool) in tools_all {
         let dependencies = tool.tool_depends_on();
@@ -168,6 +174,9 @@ pub async fn tools_merged_and_filtered(
         if dependencies.contains(&"vecdb".to_string()) && !vecdb_on {
             continue;
         }
+        if dependencies.contains(&"thinking".to_string()) && !is_there_a_thinking_model {
+            continue;
+        }
         filtered_tools.insert(tool_name, tool);
     }
 
@@ -532,8 +541,8 @@ fn default_param_type() -> String {
 /// TODO: Think a better way to know if we can send array type to the model
 /// 
 /// For now, anthropic models support it, gpt models don't, for other, we'll need to test
-pub fn model_supports_array_param_type(model_name: &str) -> bool {
-    model_name.starts_with("claude")
+pub fn model_supports_array_param_type(model_id: &str) -> bool {
+    model_id.contains("claude")
 }
 
 pub fn make_openai_tool_value(
@@ -584,7 +593,7 @@ impl ToolDesc {
         if !model_supports_array_param_type(model) {
             for param in &self.parameters {
                 if param.param_type == "array" {
-                    tracing::error!("Tool {} has array parameter, but model {} does not support it", self.name, model);
+                    tracing::warn!("Tool {} has array parameter, but model {} does not support it", self.name, model);
                     return false;
                 }
             }
diff --git a/refact-agent/engine/src/tools/tools_execute.rs b/refact-agent/engine/src/tools/tools_execute.rs
index 08d5d2853..08d34d1b7 100644
--- a/refact-agent/engine/src/tools/tools_execute.rs
+++ b/refact-agent/engine/src/tools/tools_execute.rs
@@ -1,5 +1,5 @@
 use std::collections::HashMap;
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use glob::Pattern;
 use indexmap::IndexMap;
 use tokio::sync::Mutex as AMutex;
@@ -9,7 +9,9 @@ use tracing::{info, warn};
 
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::at_commands::execute_at::MIN_RAG_CONTEXT_LIMIT;
-use crate::call_validation::{ChatMessage, ChatContent, ContextEnum, ContextFile, SubchatParameters};
+use crate::call_validation::{ChatContent, ChatMessage, ChatModelType, ContextEnum, ContextFile, SubchatParameters};
+use crate::custom_error::MapErrToString;
+use crate::global_context::try_load_caps_quickly_if_not_present;
 use crate::http::http_post_json;
 use crate::integrations::docker::docker_container_manager::docker_container_get_host_lsp_port_to_connect;
 use crate::postprocessing::pp_context_files::postprocess_context_files;
@@ -17,7 +19,7 @@ use crate::postprocessing::pp_plain_text::postprocess_plain_text;
 use crate::scratchpads::scratchpad_utils::{HasRagResults, max_tokens_for_rag_chat_by_tools};
 use crate::tools::tools_description::{MatchConfirmDenyResult, Tool};
 use crate::yaml_configs::customization_loader::load_customization;
-use crate::caps::get_model_record;
+use crate::caps::{is_cloud_model, resolve_chat_model, resolve_model};
 use crate::http::routers::v1::at_tools::{ToolExecuteResponse, ToolsExecutePost};
 
 
@@ -35,12 +37,7 @@ pub async fn unwrap_subchat_params(ccx: Arc<AMutex<AtCommandsContext>>, tool_nam
             let mut error_log = Vec::new();
             let tconfig = load_customization(gcx.clone(), true, &mut error_log).await;
             for e in error_log.iter() {
-                tracing::error!(
-                    "{}:{} {:?}",
-                    crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-                    e.error_line,
-                    e.error_msg,
-                );
+                tracing::error!("{e}");
             }
             tconfig.subchat_tool_parameters.get(tool_name).cloned()
                 .ok_or_else(|| format!("subchat params for tool {} not found (checked in Post and in Customization)", tool_name))?
@@ -48,20 +45,47 @@ pub async fn unwrap_subchat_params(ccx: Arc<AMutex<AtCommandsContext>>, tool_nam
     };
 
     // check if the models exist otherwise use the external chat model
-    match get_model_record(gcx, &params.subchat_model).await {
-        Ok(_) => {}
-        Err(err) => {
-            let current_model = ccx.lock().await.current_model.clone();
-            warn!("subchat_model {} is not available: {}. Using {} model as a fallback", params.subchat_model, err, current_model);
-            params.subchat_model = current_model;
+    let caps = try_load_caps_quickly_if_not_present(gcx.clone(), 0).await.map_err_to_string()?;
+
+    if !params.subchat_model.is_empty() {
+        match resolve_chat_model(caps.clone(), &params.subchat_model) {
+            Ok(_) => return Ok(params),
+            Err(e) => {
+                tracing::warn!("Specified subchat_model {} is not available: {}", params.subchat_model, e);
+            }
         }
     }
+
+    let current_model = ccx.lock().await.current_model.clone();
+    let model_to_resolve = match params.subchat_model_type {
+        ChatModelType::Light => &caps.defaults.chat_light_model,
+        ChatModelType::Default => &caps.defaults.chat_default_model,
+        ChatModelType::Thinking => &caps.defaults.chat_thinking_model,
+    };
+
+    params.subchat_model = match resolve_model(&caps.chat_models, model_to_resolve) {
+        Ok(model_rec) => {
+            if !is_cloud_model(&current_model) && is_cloud_model(&model_rec.base.id)
+                && params.subchat_model_type != ChatModelType::Light {
+                current_model.to_string()
+            } else {
+                model_rec.base.id.clone()
+            }
+        },
+        Err(e) => {
+            tracing::warn!("{:?} model is not available: {}. Using {} model as a fallback.", 
+                params.subchat_model_type, e, current_model);
+            current_model
+        }
+    };
+
+    tracing::info!("using model for subchat: {}", params.subchat_model);
     Ok(params)
 }
 
 pub async fn run_tools_remotely(
     ccx: Arc<AMutex<AtCommandsContext>>,
-    model_name: &str,
+    model_id: &str,
     maxgen: usize,
     original_messages: &[ChatMessage],
     stream_back_to_user: &mut HasRagResults,
@@ -87,7 +111,7 @@ pub async fn run_tools_remotely(
         maxgen,
         subchat_tool_parameters,
         postprocess_parameters,
-        model_name: model_name.to_string(),
+        model_name: model_id.to_string(),
         chat_id,
         style: style.clone(),
     };
@@ -109,7 +133,7 @@ pub async fn run_tools_remotely(
 pub async fn run_tools_locally(
     ccx: Arc<AMutex<AtCommandsContext>>,
     tools: &mut IndexMap<String, Box<dyn Tool + Send>>,
-    tokenizer: Arc<RwLock<Tokenizer>>,
+    tokenizer: Option<Arc<Tokenizer>>,
     maxgen: usize,
     original_messages: &Vec<ChatMessage>,
     stream_back_to_user: &mut HasRagResults,
@@ -131,7 +155,7 @@ pub async fn run_tools_locally(
 pub async fn run_tools(
     ccx: Arc<AMutex<AtCommandsContext>>,
     tools: &mut IndexMap<String, Box<dyn Tool+Send>>,
-    tokenizer: Arc<RwLock<Tokenizer>>,
+    tokenizer: Option<Arc<Tokenizer>>,
     maxgen: usize,
     original_messages: &Vec<ChatMessage>,
     style: &Option<String>,
@@ -276,7 +300,7 @@ async fn pp_run_tools(
     generated_other: Vec<ChatMessage>,
     context_files_for_pp: &mut Vec<ContextFile>,
     tokens_for_rag: usize,
-    tokenizer: Arc<RwLock<Tokenizer>>,
+    tokenizer: Option<Arc<Tokenizer>>,
     style: &Option<String>,
 ) -> (Vec<ChatMessage>, Vec<ChatMessage>) {
     let mut generated_tool = generated_tool.to_vec();
diff --git a/refact-agent/engine/src/vecdb/vdb_file_splitter.rs b/refact-agent/engine/src/vecdb/vdb_file_splitter.rs
index 8a46b771c..a62154ec9 100644
--- a/refact-agent/engine/src/vecdb/vdb_file_splitter.rs
+++ b/refact-agent/engine/src/vecdb/vdb_file_splitter.rs
@@ -1,14 +1,13 @@
 use std::sync::Arc;
-use std::sync::RwLock as StdRwLock;
 
 use tokenizers::Tokenizer;
 use tokio::sync::RwLock as ARwLock;
 
 use crate::ast::chunk_utils::get_chunks;
-use crate::ast::count_tokens;
 use crate::ast::file_splitter::LINES_OVERLAP;
 use crate::files_in_workspace::Document;
 use crate::global_context::GlobalContext;
+use crate::tokens::count_text_tokens_with_fallback;
 use crate::vecdb::vdb_structs::SplitResult;
 
 pub struct FileSplitter {
@@ -24,7 +23,7 @@ impl FileSplitter {
     }
 
     pub async fn vectorization_split(&self, doc: &Document,
-                                     tokenizer: Option<Arc<StdRwLock<Tokenizer>>>,
+                                     tokenizer: Option<Arc<Tokenizer>>,
                                      tokens_limit: usize,
                                      global_context: Arc<ARwLock<GlobalContext>>
     ) -> Result<Vec<SplitResult>, String> {
@@ -41,7 +40,7 @@ impl FileSplitter {
         let mut top_row: i32 = -1;
         let lines = text.split('\n').collect::<Vec<_>>();
         for (line_idx, line) in lines.iter().enumerate() {
-            let text_orig_tok_n = count_tokens(tokenizer.clone(), line);
+            let text_orig_tok_n = count_text_tokens_with_fallback(tokenizer.clone(), line);
             if top_row == -1 && text_orig_tok_n != 0 { // top lines are empty
                 top_row = line_idx as i32;
             }
diff --git a/refact-agent/engine/src/vecdb/vdb_highlev.rs b/refact-agent/engine/src/vecdb/vdb_highlev.rs
index c57c79b6a..bbb11b4b0 100644
--- a/refact-agent/engine/src/vecdb/vdb_highlev.rs
+++ b/refact-agent/engine/src/vecdb/vdb_highlev.rs
@@ -7,7 +7,6 @@ use async_trait::async_trait;
 use tracing::{error, info};
 
 use crate::background_tasks::BackgroundTasksHolder;
-use crate::caps::get_custom_embedding_api_key;
 use crate::fetch_embedding;
 use crate::global_context::{CommandLine, GlobalContext};
 use crate::knowledge::{MemdbSubEvent, MemoriesDatabase};
@@ -17,15 +16,6 @@ use crate::vecdb::vdb_structs::{MemoRecord, MemoSearchResult, SearchResult, VecD
 use crate::vecdb::vdb_thread::{vecdb_start_background_tasks, vectorizer_enqueue_dirty_memory, vectorizer_enqueue_files, FileVectorizerService};
 
 
-fn model_to_rejection_threshold(embedding_model: &str) -> f32 {
-    match embedding_model {
-        "text-embedding-3-small" => 0.63,
-        "thenlper_gte" => 0.25,
-        _ => 0.63,
-    }
-}
-
-
 pub struct VecDb {
     pub memdb: Arc<AMutex<MemoriesDatabase>>,
     vecdb_emb_client: Arc<AMutex<reqwest::Client>>,
@@ -49,24 +39,10 @@ async fn do_i_need_to_reload_vecdb(
 
     let vecdb_max_files = gcx.read().await.cmdline.vecdb_max_files;
     let mut consts = {
-        let caps_locked = caps.read().unwrap();
-        let mut b = caps_locked.embedding_batch;
-        if b == 0 {
-            b = 64;
-        }
-        if b > 256 {
-            tracing::warn!("embedding_batch can't be higher than 256");
-            b = 64;
-        }
         VecdbConstants {
-            embedding_model: caps_locked.embedding_model.clone(),
-            embedding_size: caps_locked.embedding_size,
-            embedding_batch: b,
-            vectorizer_n_ctx: caps_locked.embedding_n_ctx,
+            embedding_model: caps.embedding_model.clone(),
             tokenizer: None,
-            endpoint_embeddings_template: caps_locked.endpoint_embeddings_template.clone(),
-            endpoint_embeddings_style: caps_locked.endpoint_embeddings_style.clone(),
-            splitter_window_size: caps_locked.embedding_n_ctx / 2,
+            splitter_window_size: caps.embedding_model.base.n_ctx / 2,
             vecdb_max_files: vecdb_max_files,
         }
     };
@@ -77,30 +53,29 @@ async fn do_i_need_to_reload_vecdb(
         Some(ref db) => {
             if
                 db.constants.embedding_model == consts.embedding_model &&
-                db.constants.endpoint_embeddings_template == consts.endpoint_embeddings_template &&
-                db.constants.endpoint_embeddings_style == consts.endpoint_embeddings_style &&
-                db.constants.splitter_window_size == consts.splitter_window_size &&
-                db.constants.embedding_batch == consts.embedding_batch &&
-                db.constants.embedding_size == consts.embedding_size
+                db.constants.splitter_window_size == consts.splitter_window_size
             {
                 return (false, None);
             }
         }
     }
 
-    if consts.embedding_model.is_empty() || consts.endpoint_embeddings_template.is_empty() {
-        error!("command line says to launch vecdb, but this will not happen: embedding_model.is_empty() || endpoint_embeddings_template.is_empty()");
+    if consts.embedding_model.base.name.is_empty() || consts.embedding_model.base.endpoint.is_empty() {
+        error!("command line says to launch vecdb, but this will not happen: embedding model name or endpoint are empty");
         return (true, None);
     }
 
-    let tokenizer_maybe = crate::cached_tokenizers::cached_tokenizer(
-        caps.clone(), gcx.clone(), consts.embedding_model.clone()).await;
-    if tokenizer_maybe.is_err() {
-        error!("vecdb launch failed, embedding model tokenizer didn't load: {}", tokenizer_maybe.unwrap_err());
-        return (false, None);
-    }
-    consts.tokenizer = Some(tokenizer_maybe.clone().unwrap());
-
+    let tokenizer_result = crate::tokens::cached_tokenizer(
+        gcx.clone(), &consts.embedding_model.base,
+    ).await;
+    
+    consts.tokenizer = match tokenizer_result {
+        Ok(tokenizer) => tokenizer,
+        Err(err) => {
+            error!("vecdb launch failed, embedding model tokenizer didn't load: {}", err);
+            return (false, None);
+        }
+    };
     return (true, Some(consts));
 }
 
@@ -166,17 +141,15 @@ impl VecDb {
         config_dir: &PathBuf,
         cmdline: CommandLine,
         constants: VecdbConstants,
-        api_key: &String
     ) -> Result<VecDb, String> {
         let emb_table_name = crate::vecdb::vdb_emb_aux::create_emb_table_name(&vec![cmdline.workspace_folder]);
-        let handler = VecDBSqlite::init(cache_dir, &constants.embedding_model, constants.embedding_size, &emb_table_name).await?;
+        let handler = VecDBSqlite::init(cache_dir, &constants.embedding_model.base.name, constants.embedding_model.embedding_size, &emb_table_name).await?;
         let vecdb_handler = Arc::new(AMutex::new(handler));
         let memdb = Arc::new(AMutex::new(MemoriesDatabase::init(config_dir, &constants, &emb_table_name, cmdline.reset_memory).await?));
 
         let vectorizer_service = Arc::new(AMutex::new(FileVectorizerService::new(
             vecdb_handler.clone(),
             constants.clone(),
-            api_key.clone(),
             memdb.clone(),
         ).await));
 
@@ -423,18 +396,10 @@ pub async fn memories_search(
         )
     };
 
-    let api_key = get_custom_embedding_api_key(gcx.clone()).await;
-    if let Err(err) = api_key {
-        return Err(err.message);
-    }
-
-    let embedding = fetch_embedding::get_embedding_with_retry(
+    let embedding = fetch_embedding::get_embedding_with_retries(
         vecdb_emb_client,
-        &constants.endpoint_embeddings_style,
         &constants.embedding_model,
-        &constants.endpoint_embeddings_template,
         vec![query.clone()],
-        &api_key.unwrap(),
         5,
     ).await?;
     if embedding.is_empty() {
@@ -452,10 +417,9 @@ pub async fn memories_search(
         score_a.partial_cmp(&score_b).unwrap_or(std::cmp::Ordering::Equal)
     });
 
-    let rejection_threshold = model_to_rejection_threshold(constants.embedding_model.as_str());
     let mut filtered_results = Vec::new();
     for rec in results.iter() {
-        if rec.distance.abs() >= rejection_threshold {
+        if rec.distance.abs() >= constants.embedding_model.rejection_threshold {
             info!("distance {:.3} -> dropped memory {}", rec.distance, rec.memid);
         } else {
             info!("distance {:.3} -> kept memory {}", rec.distance, rec.memid);
@@ -512,17 +476,13 @@ impl VecdbSearch for VecDb {
         query: String,
         top_n: usize,
         vecdb_scope_filter_mb: Option<String>,
-        api_key: &String,
     ) -> Result<SearchResult, String> {
         // TODO: move out of struct, replace self with Arc
         let t0 = std::time::Instant::now();
-        let embedding_mb = fetch_embedding::get_embedding_with_retry(
+        let embedding_mb = fetch_embedding::get_embedding_with_retries(
             self.vecdb_emb_client.clone(),
-            &self.constants.endpoint_embeddings_style,
             &self.constants.embedding_model,
-            &self.constants.endpoint_embeddings_template,
             vec![query.clone()],
-            api_key,
             5,
         ).await;
         if embedding_mb.is_err() {
@@ -542,7 +502,7 @@ impl VecdbSearch for VecDb {
         info!("search itself {:.3}s", t1.elapsed().as_secs_f64());
         let mut dist0 = 0.0;
         let mut filtered_results = Vec::new();
-        let rejection_threshold = model_to_rejection_threshold(self.constants.embedding_model.as_str());
+        let rejection_threshold = self.constants.embedding_model.rejection_threshold;
         info!("rejection_threshold {:.3}", rejection_threshold);
         for rec in results.iter_mut() {
             if dist0 == 0.0 {
diff --git a/refact-agent/engine/src/vecdb/vdb_init.rs b/refact-agent/engine/src/vecdb/vdb_init.rs
index 4d304bde5..454a0b0fa 100644
--- a/refact-agent/engine/src/vecdb/vdb_init.rs
+++ b/refact-agent/engine/src/vecdb/vdb_init.rs
@@ -5,7 +5,6 @@ use tokio::sync::Mutex as AMutex;
 use tokio::time::sleep;
 use tracing::{debug, error, info, warn};
 
-use crate::caps::get_custom_embedding_api_key;
 use crate::global_context::{CommandLine, GlobalContext};
 use crate::vecdb::vdb_highlev::VecDb;
 use crate::vecdb::vdb_structs::{VecdbConstants, VecdbSearch};
@@ -34,7 +33,6 @@ impl Default for VecDbInitConfig {
 
 #[derive(Debug)]
 pub enum VecDbInitError {
-    ApiKeyError(String),
     InitializationError(String),
     TestSearchError(String),
 }
@@ -42,7 +40,6 @@ pub enum VecDbInitError {
 impl std::fmt::Display for VecDbInitError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            VecDbInitError::ApiKeyError(msg) => write!(f, "API key error: {}", msg),
             VecDbInitError::InitializationError(msg) => write!(f, "Initialization error: {}", msg),
             VecDbInitError::TestSearchError(msg) => write!(f, "Test search error: {}", msg),
         }
@@ -54,7 +51,6 @@ pub async fn init_vecdb_fail_safe(
     config_dir: &PathBuf,
     cmdline: CommandLine,
     constants: VecdbConstants,
-    api_key: &String,
     init_config: VecDbInitConfig,
 ) -> Result<VecDb, VecDbInitError> {
     let mut attempt: usize = 0;
@@ -64,12 +60,12 @@ pub async fn init_vecdb_fail_safe(
         attempt += 1;
         info!("VecDb init attempt {}/{}", attempt, init_config.max_attempts);
         
-        match VecDb::init(cache_dir, config_dir, cmdline.clone(), constants.clone(), api_key).await {
+        match VecDb::init(cache_dir, config_dir, cmdline.clone(), constants.clone()).await {
             Ok(vecdb) => {
                 info!("Successfully initialized VecDb on attempt {}", attempt);
                 
                 if init_config.test_search_after_init {
-                    match vecdb_test_search(&vecdb, api_key).await {
+                    match vecdb_test_search(&vecdb).await {
                         Ok(_) => {
                             info!("VecDb test search successful");
                             return Ok(vecdb);
@@ -104,12 +100,12 @@ pub async fn init_vecdb_fail_safe(
     }
 }
 
-async fn vecdb_test_search(vecdb: &VecDb, api_key: &String) -> Result<(), String> {
+async fn vecdb_test_search(vecdb: &VecDb) -> Result<(), String> {
     let test_query = "test query".to_string();
     let top_n = 3;
     let filter = None;
     
-    match VecdbSearch::vecdb_search(vecdb, test_query, top_n, filter, api_key).await {
+    match VecdbSearch::vecdb_search(vecdb, test_query, top_n, filter).await {
         Ok(_) => Ok(()),
         Err(e) => Err(format!("Test search failed: {}", e)),
     }
@@ -120,10 +116,6 @@ pub async fn initialize_vecdb_with_context(
     constants: VecdbConstants,
     init_config: Option<VecDbInitConfig>,
 ) -> Result<(), VecDbInitError> {
-    let api_key = match get_custom_embedding_api_key(gcx.clone()).await {
-        Ok(key) => key,
-        Err(err) => return Err(VecDbInitError::ApiKeyError(err.message)),
-    };
     
     let (cache_dir, config_dir, cmdline) = {
         let gcx_locked = gcx.read().await;
@@ -141,7 +133,6 @@ pub async fn initialize_vecdb_with_context(
         &base_dir_config,
         cmdline.clone(),
         constants,
-        &api_key,
         config,
     ).await?;
     
diff --git a/refact-agent/engine/src/vecdb/vdb_remote.rs b/refact-agent/engine/src/vecdb/vdb_remote.rs
index dc2260fce..930eb828e 100644
--- a/refact-agent/engine/src/vecdb/vdb_remote.rs
+++ b/refact-agent/engine/src/vecdb/vdb_remote.rs
@@ -17,7 +17,6 @@ impl VecdbSearch for VecDbRemote {
         query: String,
         top_n: usize,
         _vecdb_scope_filter_mb: Option<String>,
-        _api_key: &String,
     ) -> Result<SearchResult, String> {
         // NOTE: if you're going to use https make sure that you set insecure flag from cmdline
         let url = "http://127.0.0.1:8008/v1/vdb-search".to_string();
diff --git a/refact-agent/engine/src/vecdb/vdb_structs.rs b/refact-agent/engine/src/vecdb/vdb_structs.rs
index de29dd37e..6a06a0cde 100644
--- a/refact-agent/engine/src/vecdb/vdb_structs.rs
+++ b/refact-agent/engine/src/vecdb/vdb_structs.rs
@@ -1,12 +1,13 @@
 use std::fmt::Debug;
 use std::path::PathBuf;
-use std::sync::RwLock as StdRwLock;
 use std::sync::Arc;
 use serde::{Deserialize, Serialize};
 use indexmap::IndexMap;
 use tokenizers::Tokenizer;
 use async_trait::async_trait;
 
+use crate::caps::EmbeddingModelRecord;
+
 
 #[async_trait]
 pub trait VecdbSearch: Send {
@@ -15,20 +16,14 @@ pub trait VecdbSearch: Send {
         query: String,
         top_n: usize,
         filter_mb: Option<String>,
-        api_key: &String,
     ) -> Result<SearchResult, String>;
 }
 
 #[derive(Debug, Clone)]
 pub struct VecdbConstants {
     // constant in a sense it cannot be changed without creating a new db
-    pub embedding_model: String,
-    pub embedding_size: i32,
-    pub embedding_batch: usize,
-    pub tokenizer: Option<Arc<StdRwLock<Tokenizer>>>,
-    pub vectorizer_n_ctx: usize,
-    pub endpoint_embeddings_template: String,
-    pub endpoint_embeddings_style: String,
+    pub embedding_model: EmbeddingModelRecord,
+    pub tokenizer: Option<Arc<Tokenizer>>,
     pub splitter_window_size: usize,
     pub vecdb_max_files: usize,
 }
diff --git a/refact-agent/engine/src/vecdb/vdb_thread.rs b/refact-agent/engine/src/vecdb/vdb_thread.rs
index b41d47e0b..9fb5492f1 100644
--- a/refact-agent/engine/src/vecdb/vdb_thread.rs
+++ b/refact-agent/engine/src/vecdb/vdb_thread.rs
@@ -11,7 +11,7 @@ use tokio::task::JoinHandle;
 use tracing::{info, warn};
 
 use crate::ast::file_splitter::AstBasedFileSplitter;
-use crate::fetch_embedding::get_embedding_with_retry;
+use crate::fetch_embedding::get_embedding_with_retries;
 use crate::files_in_workspace::{is_path_to_enqueue_valid, Document};
 use crate::global_context::GlobalContext;
 use crate::knowledge::{vectorize_dirty_memories, MemoriesDatabase};
@@ -33,7 +33,6 @@ pub struct FileVectorizerService {
     pub vstatus: Arc<AMutex<VecDbStatus>>,
     pub vstatus_notify: Arc<ANotify>,   // fun stuff https://docs.rs/tokio/latest/tokio/sync/struct.Notify.html
     constants: VecdbConstants,
-    api_key: String,
     memdb: Arc<AMutex<MemoriesDatabase>>,
     vecdb_todo: Arc<AMutex<VecDeque<MessageToVecdbThread>>>,
 }
@@ -44,21 +43,17 @@ async fn vectorize_batch_from_q(
     vstatus: Arc<AMutex<VecDbStatus>>,
     client: Arc<AMutex<reqwest::Client>>,
     constants: &VecdbConstants,
-    api_key: &String,
     vecdb_handler_arc: Arc<AMutex<VecDBSqlite>>,
-    #[allow(non_snake_case)]
-    B: usize,
 ) -> Result<(), String> {
+    #[allow(non_snake_case)]
+    let B = constants.embedding_model.embedding_batch;
     let batch = run_actual_model_on_these.drain(..B.min(run_actual_model_on_these.len())).collect::<Vec<_>>();
     assert!(batch.len() > 0);
 
-    let batch_result = match get_embedding_with_retry(
+    let batch_result = match get_embedding_with_retries(
         client.clone(),
-        &constants.endpoint_embeddings_style.clone(),
-        &constants.embedding_model.clone(),
-        &constants.endpoint_embeddings_template.clone(),
+        &constants.embedding_model,
         batch.iter().map(|x| x.window_text.clone()).collect(),
-        api_key,
         10,
     ).await {
         Ok(res) => res,
@@ -170,7 +165,6 @@ async fn vectorize_thread(
         vecdb_handler_arc,
         vstatus,
         vstatus_notify,
-        api_key
     ) = {
         let vservice_locked = vservice.lock().await;
         (
@@ -180,7 +174,6 @@ async fn vectorize_thread(
             vservice_locked.vecdb_handler.clone(),
             vservice_locked.vstatus.clone(),
             vservice_locked.vstatus_notify.clone(),
-            vservice_locked.api_key.clone()
         )
     };
 
@@ -238,7 +231,7 @@ async fn vectorize_thread(
         loop {
             if
             run_actual_model_on_these.len() > 0 && flush ||
-                run_actual_model_on_these.len() >= constants.embedding_batch
+                run_actual_model_on_these.len() >= constants.embedding_model.embedding_batch
             {
                 if let Err(err) = vectorize_batch_from_q(
                     &mut run_actual_model_on_these,
@@ -246,9 +239,7 @@ async fn vectorize_thread(
                     vstatus.clone(),
                     client.clone(),
                     &constants,
-                    &api_key,
                     vecdb_handler_arc.clone(),
-                    constants.embedding_batch,
                 ).await {
                     tracing::error!("{}", err);
                     continue;
@@ -281,8 +272,7 @@ async fn vectorize_thread(
                         vecdb_handler_arc.clone(),
                         vstatus.clone(),
                         client.clone(),
-                        &api_key,
-                        constants.embedding_batch,
+                        constants.embedding_model.embedding_batch,
                     ).await;
                     info!("/MEMDB {:?}", r);
                     continue;
@@ -353,7 +343,7 @@ async fn vectorize_thread(
         }
 
         let file_splitter = AstBasedFileSplitter::new(constants.splitter_window_size);
-        let mut splits = file_splitter.vectorization_split(&doc, None, gcx.clone(), constants.vectorizer_n_ctx).await.unwrap_or_else(|err| {
+        let mut splits = file_splitter.vectorization_split(&doc, None, gcx.clone(), constants.embedding_model.base.n_ctx).await.unwrap_or_else(|err| {
             info!("{}", err);
             vec![]
         });
@@ -424,7 +414,6 @@ impl FileVectorizerService {
     pub async fn new(
         vecdb_handler: Arc<AMutex<VecDBSqlite>>,
         constants: VecdbConstants,
-        api_key: String,
         memdb: Arc<AMutex<MemoriesDatabase>>,
     ) -> Self {
         let vstatus = Arc::new(AMutex::new(
@@ -446,7 +435,6 @@ impl FileVectorizerService {
             vstatus: vstatus.clone(),
             vstatus_notify: Arc::new(ANotify::new()),
             constants,
-            api_key,
             memdb,
             vecdb_todo: Default::default(),
         }
diff --git a/refact-agent/engine/src/yaml_configs/create_configs.rs b/refact-agent/engine/src/yaml_configs/create_configs.rs
index ac09b9853..38e13e75e 100644
--- a/refact-agent/engine/src/yaml_configs/create_configs.rs
+++ b/refact-agent/engine/src/yaml_configs/create_configs.rs
@@ -17,14 +17,18 @@ pub async fn yaml_configs_try_create_all(gcx: Arc<ARwLock<GlobalContext>>) -> St
     let mut results = Vec::new();
     let config_dir = gcx.read().await.config_dir.clone();
 
-    let integrations_d = config_dir.join("integrations.d");
-    if let Err(e) = tokio::fs::create_dir_all(&integrations_d).await {
-        tracing::warn!("Failed to create directory {:?}: {}", integrations_d, e);
-        results.push(format!("Error creating directory {:?}: {}", integrations_d, e));
+    let dirs_to_create = [
+        config_dir.join("integrations.d"),
+        config_dir.join("providers.d"),
+    ];
+    for dir in dirs_to_create {
+        if let Err(e) = tokio::fs::create_dir_all(&dir).await {
+            tracing::warn!("Failed to create directory {:?}: {}", dir, e);
+            results.push(format!("Error creating directory {:?}: {}", dir, e));
+        }
     }
 
     let files = vec![
-        ("bring-your-own-key.yaml", crate::caps::BRING_YOUR_OWN_KEY_SAMPLE),
         ("customization.yaml", include_str!("default_customization.yaml")),
         ("privacy.yaml", include_str!("default_privacy.yaml")),
         ("indexing.yaml", include_str!("default_indexing.yaml")),
@@ -33,7 +37,7 @@ pub async fn yaml_configs_try_create_all(gcx: Arc<ARwLock<GlobalContext>>) -> St
 
     for (file_name, content) in files {
         let file_path = if file_name == "integrations.d/shell.yaml" {
-            integrations_d.join("shell.yaml")
+            config_dir.join("integrations.d").join("shell.yaml")
         } else {
             config_dir.join(file_name)
         };
diff --git a/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml b/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml
index 63186b55a..91a573b3f 100644
--- a/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml
+++ b/refact-agent/engine/src/yaml_configs/customization_compiled_in.yaml
@@ -75,7 +75,7 @@ PROMPT_AGENTIC_TOOLS: |
       - You also can use `locate()` with the task description to find all necessary files automatically.
       - Use `cat("filename")` to look inside the most important files without compression.
     - Check any files that might indirectly relate to the task.
-    - Running available validation tools preliminary - is a good idea. 
+    - Running available validation tools preliminary - is a good idea.
   **Step 3: Make a Clear Plan**
     - Goal: Create a clear, validated plan before making changes.
     - After gathering context (Step 2), create your plan independently.
@@ -111,7 +111,7 @@ PROMPT_AGENTIC_TOOLS: |
   %WORKSPACE_INFO%
 
   %PROJECT_SUMMARY%
-  
+
   **Always test you solutions!**
   **Clearly comment before each action.**
   **Document results carefully.**
@@ -235,23 +235,23 @@ system_prompts:
 
 subchat_tool_parameters:
   locate:
-    subchat_model: "gpt-4o-mini"
+    subchat_model_type: "light"
     subchat_tokens_for_rag: 30000
     subchat_n_ctx: 32000
     subchat_max_new_tokens: 8000
   locate_search:
-    subchat_model: "gpt-4o-mini"
+    subchat_model_type: "light"
     subchat_tokens_for_rag: 10000
     subchat_n_ctx: 16000
     subchat_max_new_tokens: 2000
   deep_analysis:
-    subchat_model: "o3-mini"
+    subchat_model_type: "thinking"
     subchat_tokens_for_rag: 70000
     subchat_n_ctx: 128000
     subchat_max_new_tokens: 32000
     subchat_reasoning_effort: "high"
   create_memory_bank:
-    subchat_model: "o3-mini"
+    subchat_model_type: "default"
     subchat_tokens_for_rag: 88000
     subchat_n_ctx: 128000
     subchat_max_new_tokens: 32000
diff --git a/refact-agent/engine/src/yaml_configs/customization_loader.rs b/refact-agent/engine/src/yaml_configs/customization_loader.rs
index 644c4c49a..48c2efbb4 100644
--- a/refact-agent/engine/src/yaml_configs/customization_loader.rs
+++ b/refact-agent/engine/src/yaml_configs/customization_loader.rs
@@ -7,7 +7,7 @@ use tokio::sync::RwLock as ARwLock;
 
 use crate::call_validation::{ChatMessage, SubchatParameters};
 use crate::global_context::{GlobalContext, try_load_caps_quickly_if_not_present};
-use crate::integrations::setting_up_integrations::YamlError;
+use crate::custom_error::YamlError;
 
 
 #[derive(Debug, Serialize, Deserialize, Default)]
@@ -131,7 +131,7 @@ pub fn load_and_mix_with_users_config(
     let user_unstructured: serde_yaml::Value = serde_yaml::from_str(user_yaml)
         .map_err(|e| {
             error_log.push(YamlError {
-                integr_config_path: "customization.yaml".to_string(),
+                path: "customization.yaml".to_string(),
                 error_line: 0,
                 error_msg: e.to_string(),
             });
@@ -146,7 +146,7 @@ pub fn load_and_mix_with_users_config(
     let mut user_config: CustomizationYaml = serde_yaml::from_str(user_yaml)
         .map_err(|e| {
             error_log.push(YamlError {
-                integr_config_path: "customization.yaml".to_string(),
+                path: "customization.yaml".to_string(),
                 error_line: 0,
                 error_msg: e.to_string(),
             });
@@ -155,7 +155,7 @@ pub fn load_and_mix_with_users_config(
     let caps_config: CustomizationYaml = serde_yaml::from_str(caps_yaml)
         .map_err(|e| {
             error_log.push(YamlError {
-                integr_config_path: "caps.yaml".to_string(),
+                path: "caps.yaml".to_string(),
                 error_line: 0,
                 error_msg: e.to_string(),
             });
@@ -207,8 +207,9 @@ pub async fn load_customization(
     let caps = match try_load_caps_quickly_if_not_present(gcx.clone(), 0).await {
         Ok(caps) => caps,
         Err(e) => {
+            let address_url = gcx.read().await.cmdline.address_url.clone();
             error_log.push(YamlError {
-                integr_config_path: "bring-your-own-key.yaml".to_string(),
+                path: address_url,
                 error_line: 0,
                 error_msg: format!("error loading caps: {e}"),
             });
@@ -216,11 +217,6 @@ pub async fn load_customization(
         }
     };
 
-    let caps_config_text = {
-        let caps_locked = caps.read().unwrap();
-        caps_locked.customization.clone()
-    };
-
     let config_dir = gcx.read().await.config_dir.clone();
     let customization_yaml_path = config_dir.join("customization.yaml");
     let user_config_text = std::fs::read_to_string(&customization_yaml_path)
@@ -229,7 +225,7 @@ pub async fn load_customization(
 
     load_and_mix_with_users_config(
         &user_config_text,
-        &caps_config_text,
+        &caps.customization,
         skip_visibility_filtering,
         allow_experimental,
         error_log,
@@ -247,12 +243,7 @@ mod tests {
             "", "", true, true, &mut error_log,
         );
         for e in error_log.iter() {
-            eprintln!(
-                "{}:{} {:?}",
-                crate::nicer_logs::last_n_chars(&e.integr_config_path, 30),
-                e.error_line,
-                e.error_msg,
-            );
+            eprintln!("{e}");
         }
         assert!(error_log.is_empty(), "There were errors in the error_log");
         assert_eq!(config.system_prompts.get("default").is_some(), true);
diff --git a/refact-agent/engine/src/yaml_configs/default_privacy.yaml b/refact-agent/engine/src/yaml_configs/default_privacy.yaml
index 404b0d927..9602b494f 100644
--- a/refact-agent/engine/src/yaml_configs/default_privacy.yaml
+++ b/refact-agent/engine/src/yaml_configs/default_privacy.yaml
@@ -13,7 +13,7 @@ privacy_rules:
     - "*/secret_project2/*.txt"
     - "*.pem"
 
-  only_send_to_servers_I_control:       # You can set up which ones you control in bring-your-own-key.yaml, otherwise you control none
+  only_send_to_servers_I_control:       # You can set up which providers you control in ~/.config/refact/providers.d/*.yaml, otherwise you control none
     - "secret_passwords.txt"
 
 
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/anthropic.yaml b/refact-agent/engine/src/yaml_configs/default_providers/anthropic.yaml
new file mode 100644
index 000000000..83702cddc
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/anthropic.yaml
@@ -0,0 +1,27 @@
+chat_endpoint: https://api.anthropic.com/v1/chat/completions
+supports_completion: false
+
+api_key: sk-ant-...
+
+chat_models:
+  claude-3-7-sonnet-latest:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_clicks: true
+    supports_agent: true
+    supports_reasoning: anthropic
+    tokenizer: hf://Xenova/claude-tokenizer
+
+running_models:
+  - claude-3-7-sonnet-latest
+  - claude-3-5-sonnet-latest
+  - claude-3-5-haiku-latest
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_agent: true
+    tokenizer: hf://Xenova/claude-tokenizer
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/custom.yaml b/refact-agent/engine/src/yaml_configs/default_providers/custom.yaml
new file mode 100644
index 000000000..50e489680
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/custom.yaml
@@ -0,0 +1,7 @@
+supports_completion: true
+
+model_default_settings_ui:
+  chat:
+    tokenizer: fake
+  completion:
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/deepseek.yaml b/refact-agent/engine/src/yaml_configs/default_providers/deepseek.yaml
new file mode 100644
index 000000000..144699d03
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/deepseek.yaml
@@ -0,0 +1,15 @@
+chat_endpoint: https://api.deepseek.com/v1/chat/completions
+supports_completion: false
+
+api_key: sk-...
+
+running_models:
+  - deepseek-chat
+  - deepseek-reasoner
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 64000
+    supports_tools: true
+    supports_agent: true
+    tokenizer: hf://deepseek-ai/DeepSeek-...
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/google_gemini.yaml b/refact-agent/engine/src/yaml_configs/default_providers/google_gemini.yaml
new file mode 100644
index 000000000..b422e2780
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/google_gemini.yaml
@@ -0,0 +1,19 @@
+chat_endpoint:       https://generativelanguage.googleapis.com/v1beta/openai/chat/completions
+embedding_endpoint:  https://generativelanguage.googleapis.com/v1beta/openai/embeddings
+supports_completion: false
+
+api_key: AI...
+
+running_models:
+  - gemini-2.5-pro-exp-03-25
+  - gemini-2.5-pro-preview-03-25
+  - models/gemini-2.0-flash
+  - models/gemini-2.0-flash-lite
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_agent: true
+    tokenizer: hf://Xenova/gemma2-tokenizer
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/groq.yaml b/refact-agent/engine/src/yaml_configs/default_providers/groq.yaml
new file mode 100644
index 000000000..2c6702da2
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/groq.yaml
@@ -0,0 +1,14 @@
+chat_endpoint: https://api.groq.com/openai/v1/chat/completions
+supports_completion: false
+
+api_key: gsk_...
+
+running_models:
+  - qwen-qwq-32b
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 131072
+    supports_tools: true
+    supports_agent: true
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/lmstudio.yaml b/refact-agent/engine/src/yaml_configs/default_providers/lmstudio.yaml
new file mode 100644
index 000000000..ab704fe33
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/lmstudio.yaml
@@ -0,0 +1,11 @@
+completion_endpoint: http://localhost:1234/v1/completions
+chat_endpoint:       http://localhost:1234/v1/chat/completions
+supports_completion: true
+
+api_key: any-will-work
+
+model_default_settings_ui:
+  chat:
+    tokenizer: fake
+  completion:
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/ollama.yaml b/refact-agent/engine/src/yaml_configs/default_providers/ollama.yaml
new file mode 100644
index 000000000..055bdb3d6
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/ollama.yaml
@@ -0,0 +1,32 @@
+completion_endpoint: "http://localhost:11434/v1/completions"
+chat_endpoint:       "http://localhost:11434/v1/chat/completions"
+supports_completion: true
+
+api_key: any-will-work
+
+completion_models:
+  qwen2.5-coder:1.5b-base:
+    n_ctx: 2048
+    model_family: qwen2.5-coder-base
+    tokenizer: hf://Qwen/Qwen2.5-Coder-1.5B
+    scratchpad: FIM-PSM
+    scratchpad_patch:
+      fim_prefix: <|fim_prefix|>
+      fim_suffix: <|fim_suffix|>
+      fim_middle: <|fim_middle|>
+      eot: <|endoftext|>
+      extra_stop_tokens:
+      - <|repo_name|>
+      - <|file_sep|>
+      - <|fim_pad|>
+      - <|cursor|>
+      context_format: qwen2.5
+      rag_ratio: 0.5
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 2048
+    tokenizer: fake
+  completion:
+    n_ctx: 2048
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/openai.yaml b/refact-agent/engine/src/yaml_configs/default_providers/openai.yaml
new file mode 100644
index 000000000..b4e6aebeb
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/openai.yaml
@@ -0,0 +1,24 @@
+chat_endpoint:       "https://api.openai.com/v1/chat/completions"
+embedding_endpoint:  "https://api.openai.com/v1/embeddings"
+supports_completion: false
+
+api_key: "sk-..."
+
+running_models:
+  - gpt-4.1
+  - gpt-4o
+  - gpt-4.1-mini
+  - gpt-4o-mini
+  - gpt-4.1-nano
+  - gpt-4.5-preview
+  - o3-mini
+  - o1-mini
+  - o1
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 200000
+    supports_tools: true
+    supports_multimodality: true
+    supports_agent: true
+    tokenizer: hf://Xenova/gpt-4o
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/openrouter.yaml b/refact-agent/engine/src/yaml_configs/default_providers/openrouter.yaml
new file mode 100644
index 000000000..b2a1a4966
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/openrouter.yaml
@@ -0,0 +1,14 @@
+chat_endpoint:       https://openrouter.ai/api/v1/chat/completions
+embedding_endpoint:  https://openrouter.ai/api/v1/embeddings
+supports_completion: false
+
+api_key: "sk-or-..."
+
+running_models:
+  - anthropic/claude-3.7-sonnet
+  - openai/o3-mini
+  - openai/gpt-4.1
+
+model_default_settings_ui:
+  chat:
+    tokenizer: fake
\ No newline at end of file
diff --git a/refact-agent/engine/src/yaml_configs/default_providers/xai.yaml b/refact-agent/engine/src/yaml_configs/default_providers/xai.yaml
new file mode 100644
index 000000000..5fa040aca
--- /dev/null
+++ b/refact-agent/engine/src/yaml_configs/default_providers/xai.yaml
@@ -0,0 +1,15 @@
+chat_endpoint: https://api.x.ai/v1/chat/completions
+supports_completion: false
+
+api_key: xai-...
+
+running_models:
+  - grok-3-beta
+  - grok-3-mini-beta
+
+model_default_settings_ui:
+  chat:
+    n_ctx: 131072
+    supports_tools: true
+    supports_agent: true
+    tokenizer: hf://Xenova/grok-1-tokenizer
\ No newline at end of file
diff --git a/refact-agent/gui/README.md b/refact-agent/gui/README.md
index eaa45f1d3..e2fd57912 100644
--- a/refact-agent/gui/README.md
+++ b/refact-agent/gui/README.md
@@ -339,8 +339,8 @@ type ChatUserMessageResponse = {
 };
 
 type CapsResponse = {
-  code_chat_default_model: string;
-  code_chat_models: Record<string, CodeChatModel>;
+  chat_default_model: string;
+  chat_models: Record<string, CodeChatModel>;
 };
 
 type CodeCompletionModel = {
diff --git a/refact-agent/gui/src/__fixtures__/caps.ts b/refact-agent/gui/src/__fixtures__/caps.ts
index 75a353265..24d706ec4 100644
--- a/refact-agent/gui/src/__fixtures__/caps.ts
+++ b/refact-agent/gui/src/__fixtures__/caps.ts
@@ -3,13 +3,7 @@ import { CapsResponse } from "../services/refact";
 export const STUB_CAPS_RESPONSE: CapsResponse = {
   cloud_name: "Refact",
   endpoint_style: "openai",
-
-  endpoint_template: "https://inference.smallcloud.ai/v1/completions",
-
-  endpoint_chat_passthrough:
-    "https://inference.smallcloud.ai/v1/chat/completions",
-  tokenizer_path_template:
-    "https://huggingface.co/$MODEL/resolve/main/tokenizer.json",
+  code_completion_n_ctx: 4000,
   tokenizer_rewrite_path: {
     "o1-mini": "Xenova/gpt-4o",
     "gpt-4-turbo-2024-04-09": "Xenova/gpt-4",
@@ -39,292 +33,424 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
     "starcoder2/3b": "bigcode/starcoder2-3b",
   },
   telemetry_basic_dest: "https://www.smallcloud.ai/v1/telemetry-basic",
-
-  code_completion_models: {
-    "Refact/1.6B": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-SPM": {},
-      },
-      default_scratchpad: "FIM-SPM",
-      similar_models: ["Refact/1.6B", "Refact/1.6B/vllm"],
-      supports_tools: false,
-      supports_multimodality: false,
+  telemetry_basic_retrieve_my_own:
+    "https://staging.smallcloud.ai/v1/telemetry-retrieve-my-own-stats",
+  tokenizer_path_template:
+    "https://huggingface.co/$MODEL/resolve/main/tokenizer.json",
+  endpoint_chat_passthrough:
+    "https://inference.smallcloud.ai/v1/chat/completions",
+  endpoint_template: "https://inference.smallcloud.ai/v1/completions",
+  completion_models: {
+    "Refact/smallcloudai/Refact-1_6B-fim": {
+      n_ctx: 4000,
+      name: "smallcloudai/Refact-1_6B-fim",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/Refact/1.6B": {
+      n_ctx: 4000,
+      name: "Refact/1.6B",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/starcoder2/3b": {
+      n_ctx: 4000,
+      name: "starcoder2/3b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/qwen2.5/coder/1.5b/base": {
+      n_ctx: 4000,
+      name: "qwen2.5/coder/1.5b/base",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gpt-4o": {
+      n_ctx: 4000,
+      name: "gpt-4o",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gpt-4o-mini": {
+      n_ctx: 4000,
+      name: "gpt-4o-mini",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/claude-3-5-sonnet": {
+      n_ctx: 4000,
+      name: "claude-3-5-sonnet",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/claude-3-5-haiku": {
+      n_ctx: 4000,
+      name: "claude-3-5-haiku",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/claude-3-7-sonnet": {
+      n_ctx: 4000,
+      name: "claude-3-7-sonnet",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/groq-llama-3.1-8b": {
+      n_ctx: 4000,
+      name: "groq-llama-3.1-8b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/groq-llama-3.1-70b": {
+      n_ctx: 4000,
+      name: "groq-llama-3.1-70b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-2.0-flash-exp": {
+      n_ctx: 4000,
+      name: "gemini-2.0-flash-exp",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-1.5-flash": {
+      n_ctx: 4000,
+      name: "gemini-1.5-flash",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-1.5-flash-8b": {
+      n_ctx: 4000,
+      name: "gemini-1.5-flash-8b",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/gemini-1.5-pro": {
+      n_ctx: 4000,
+      name: "gemini-1.5-pro",
+      enabled: true,
+      type: "completion",
+      model_family: null,
+    },
+    "Refact/gemini-2.0-exp-advanced": {
+      n_ctx: 4000,
+      name: "gemini-2.0-exp-advanced",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/grok-2": {
+      n_ctx: 4000,
+      name: "grok-2",
+      enabled: true,
+      model_family: null,
+      type: "completion",
+    },
+    "Refact/deepseek-chat": {
+      n_ctx: 4000,
+      name: "deepseek-chat",
+      type: "completion",
+      enabled: true,
+      model_family: null,
+    },
+  },
+  chat_models: {
+    "Refact/gpt-4o": {
+      n_ctx: 128000,
+      name: "gpt-4o",
+      id: "Refact/gpt-4o",
+      type: "chat",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "groq-llama-3.1-8b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/gpt-4o-mini": {
+      n_ctx: 128000,
+      name: "gpt-4o-mini",
+      id: "Refact/gpt-4o-mini",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
-      supports_multimodality: false,
+      type: "chat",
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "qwen2.5/coder/1.5b/base": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-PSM": {
-          fim_prefix: "<|fim_prefix|>",
-          fim_suffix: "<|fim_suffix|>",
-          fim_middle: "<|fim_middle|>",
-          eot: "<|endoftext|>",
-          extra_stop_tokens: ["<|repo_name|>", "<|file_sep|>", "<|fim_pad|>"],
-          context_format: "qwen2.5",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "FIM-PSM",
-      similar_models: [
-        "qwen2.5/coder/1.5b/base",
-        "qwen2.5/coder/3b/base",
-        "qwen2.5/coder/7b/base",
-        "qwen2.5/coder/14b/base",
-        "qwen2.5/coder/32b/base",
-        "qwen2.5/coder/0.5b/base/vllm",
-        "qwen2.5/coder/1.5b/base/vllm",
-        "qwen2.5/coder/3b/base/vllm",
-        "qwen2.5/coder/7b/base/vllm",
-        "qwen2.5/coder/14b/base/vllm",
-        "qwen2.5/coder/32b/base/vllm",
-      ],
-      supports_tools: false,
-      supports_multimodality: false,
+    "Refact/o1": {
+      n_ctx: 200000,
+      name: "o1",
+      id: "Refact/o1",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
+      type: "chat",
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: "openai",
+      supports_boost_reasoning: true,
+      default_temperature: null,
     },
-    "gpt-4o": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-4o-2024-05-13",
-        "gpt-4o-2024-08-06",
-        "openai/gpt-4o",
-      ],
+    "Refact/o1-mini": {
+      n_ctx: 128000,
+      name: "o1-mini",
+      id: "Refact/o1-mini",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      type: "chat",
+      supports_agent: false,
+      supports_reasoning: "openai",
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4o-mini": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: ["gpt-4o-mini-2024-07-18"],
+    "Refact/o3-mini": {
+      n_ctx: 200000,
+      name: "o3-mini",
+      id: "Refact/o3-mini",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
+      type: "chat",
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: "openai",
+      supports_boost_reasoning: true,
+      default_temperature: null,
     },
-    "smallcloudai/Refact-1_6B-fim": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-SPM": {},
-      },
-      default_scratchpad: "FIM-SPM",
-      similar_models: ["Refact/1.6B", "Refact/1.6B/vllm"],
-      supports_tools: false,
-      supports_multimodality: false,
+    "Refact/claude-3-5-sonnet": {
+      n_ctx: 200000,
+      name: "claude-3-5-sonnet",
+      id: "Refact/claude-3-5-sonnet",
+      enabled: true,
+      type: "chat",
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "groq-llama-3.1-70b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        REPLACE_PASSTHROUGH: {
-          context_format: "chat",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/claude-3-5-haiku": {
+      type: "chat",
+      n_ctx: 200000,
+      name: "claude-3-5-haiku",
+      id: "Refact/claude-3-5-haiku",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "starcoder2/3b": {
-      n_ctx: 4096,
-      supports_scratchpads: {
-        "FIM-PSM": {
-          context_format: "starcoder",
-          rag_ratio: 0.5,
-        },
-      },
-      default_scratchpad: "FIM-PSM",
-      similar_models: [
-        "bigcode/starcoderbase",
-        "starcoder/15b/base",
-        "starcoder/15b/plus",
-        "starcoder/1b/base",
-        "starcoder/3b/base",
-        "starcoder/7b/base",
-        "wizardcoder/15b",
-        "starcoder/1b/vllm",
-        "starcoder/3b/vllm",
-        "starcoder/7b/vllm",
-        "starcoder2/3b/base",
-        "starcoder2/7b/base",
-        "starcoder2/15b/base",
-        "starcoder2/3b/vllm",
-        "starcoder2/7b/vllm",
-        "starcoder2/15b/vllm",
-        "starcoder2/3b/neuron",
-        "starcoder2/7b/neuron",
-        "starcoder2/15b/neuron",
-        "starcoder2/3b",
-        "starcoder2/7b",
-        "starcoder2/15b",
-        "bigcode/starcoder2-3b",
-        "bigcode/starcoder2-7b",
-        "bigcode/starcoder2-15b",
-      ],
-      supports_tools: false,
+    "Refact/claude-3-7-sonnet": {
+      type: "chat",
+      n_ctx: 200000,
+      name: "claude-3-7-sonnet",
+      id: "Refact/claude-3-7-sonnet",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
+      supports_clicks: true,
+      supports_agent: true,
+      supports_reasoning: "anthropic",
+      supports_boost_reasoning: true,
+      default_temperature: null,
+    },
+    "Refact/groq-llama-3.1-8b": {
+      type: "chat",
+      n_ctx: 128000,
+      name: "groq-llama-3.1-8b",
+      id: "Refact/groq-llama-3.1-8b",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-  },
-  code_completion_default_model: "qwen2.5/coder/1.5b/base",
-  code_completion_n_ctx: 4000,
-  code_chat_models: {
-    "groq-llama-3.1-70b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/groq-llama-3.1-70b": {
+      type: "chat",
+      n_ctx: 128000,
+      name: "groq-llama-3.1-70b",
+      id: "Refact/groq-llama-3.1-70b",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-3.5-turbo": {
-      n_ctx: 16000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-3.5-turbo-1106",
-        "gpt-3.5-turbo-0125",
-        "gpt-4",
-        "gpt-4-turbo",
-        "gpt-4-turbo-2024-04-09",
-        "openai/gpt-3.5-turbo",
-        "openai/gpt-4",
-        "openai/gpt-4-turbo",
-      ],
+    "Refact/gemini-2.0-flash-exp": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-2.0-flash-exp",
+      id: "Refact/gemini-2.0-flash-exp",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
-      supports_multimodality: false,
+      supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4o": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-4o-2024-05-13",
-        "gpt-4o-2024-08-06",
-        "openai/gpt-4o",
-      ],
+    "Refact/gemini-1.5-flash": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-1.5-flash",
+      id: "Refact/gemini-1.5-flash",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: true,
       supports_clicks: false,
-      supports_agent: true,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
+    },
+    "Refact/gemini-1.5-flash-8b": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-1.5-flash-8b",
+      id: "Refact/gemini-1.5-flash-8b",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: true,
+      supports_multimodality: true,
+      supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4o-mini": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: ["gpt-4o-mini-2024-07-18"],
+    "Refact/gemini-1.5-pro": {
+      type: "chat",
+      n_ctx: 2000000,
+      name: "gemini-1.5-pro",
+      id: "Refact/gemini-1.5-pro",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: true,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "claude-3-5-sonnet": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: ["claude-3-5-sonnet-20240620"],
+    "Refact/gemini-2.0-exp-advanced": {
+      type: "chat",
+      n_ctx: 1000000,
+      name: "gemini-2.0-exp-advanced",
+      id: "Refact/gemini-2.0-exp-advanced",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: true,
       supports_clicks: false,
       supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "groq-llama-3.1-8b": {
-      n_ctx: 32000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "groq-llama-3.1-70b",
-        "groq-llama-3.2-1b",
-        "groq-llama-3.2-3b",
-        "groq-llama-3.2-11b-vision",
-        "groq-llama-3.2-90b-vision",
-      ],
+    "Refact/grok-2": {
+      type: "chat",
+      n_ctx: 128000,
+      name: "grok-2",
+      id: "Refact/grok-2",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
-    "gpt-4-turbo": {
-      n_ctx: 16000,
-      supports_scratchpads: {
-        PASSTHROUGH: {},
-      },
-      default_scratchpad: "",
-      similar_models: [
-        "gpt-3.5-turbo-1106",
-        "gpt-3.5-turbo-0125",
-        "gpt-4",
-        "gpt-4-turbo",
-        "gpt-4-turbo-2024-04-09",
-        "openai/gpt-3.5-turbo",
-        "openai/gpt-4",
-        "openai/gpt-4-turbo",
-      ],
+    "Refact/deepseek-chat": {
+      type: "chat",
+      n_ctx: 64000,
+      name: "deepseek-chat",
+      id: "Refact/deepseek-chat",
+      enabled: true,
+      tokenizer: "fake",
       supports_tools: true,
       supports_multimodality: false,
       supports_clicks: false,
+      supports_agent: true,
+      supports_reasoning: null,
+      supports_boost_reasoning: false,
+      default_temperature: null,
     },
+    "Refact/deepseek-reasoner": {
+      type: "chat",
+      n_ctx: 64000,
+      name: "deepseek-reasoner",
+      id: "Refact/deepseek-reasoner",
+      enabled: true,
+      tokenizer: "fake",
+      supports_tools: false,
+      supports_multimodality: false,
+      supports_clicks: false,
+      supports_agent: false,
+      supports_reasoning: "deepseek",
+      supports_boost_reasoning: false,
+      default_temperature: 0.6,
+    },
+  },
+  embedding_model: {
+    type: "embedding",
+    n_ctx: 512,
+    enabled: true,
+    tokenizer: "fake",
+    embedding_size: 1536,
+    name: "thenlper/gte-base",
+    id: "Refact/thenlper/gte-base",
+    rejection_threshold: 0.25,
+    embedding_batch: 64,
   },
-  code_chat_default_model: "gpt-4o-mini",
   running_models: [
     "smallcloudai/Refact-1_6B-fim",
     "Refact/1.6B",
@@ -339,20 +465,25 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
     "groq-llama-3.1-8b",
     "groq-llama-3.1-70b",
   ],
+  completion_default_model: "Refact/1.6B",
+  chat_default_model: "gpt-4o",
+  chat_thinking_model: "",
+  chat_light_model: "",
+  caps_version: 0,
   code_chat_default_system_prompt: "default",
   support_metadata: true,
-  caps_version: 0,
+  customization: "",
 };
 
 export const EMPTY_CAPS_RESPONSE: CapsResponse = {
   support_metadata: false,
   caps_version: 0,
   cloud_name: "",
-  code_chat_default_model: "",
+  chat_default_model: "",
   code_chat_default_system_prompt: "",
-  code_chat_models: {},
-  code_completion_default_model: "",
-  code_completion_models: {},
+  chat_models: {},
+  completion_default_model: "",
+  completion_models: {},
   code_completion_n_ctx: 0,
   endpoint_chat_passthrough: "",
   endpoint_style: "",
@@ -360,5 +491,9 @@ export const EMPTY_CAPS_RESPONSE: CapsResponse = {
   running_models: [],
   telemetry_basic_dest: "",
   tokenizer_path_template: "",
+  customization: "",
   tokenizer_rewrite_path: {},
+  chat_light_model: "",
+  chat_thinking_model: "",
+  telemetry_basic_retrieve_my_own: "",
 };
diff --git a/refact-agent/gui/src/__fixtures__/context_files.ts b/refact-agent/gui/src/__fixtures__/context_files.ts
index 198353e45..67b5f68a7 100644
--- a/refact-agent/gui/src/__fixtures__/context_files.ts
+++ b/refact-agent/gui/src/__fixtures__/context_files.ts
@@ -5,8 +5,8 @@ const some_text = `import { CapsResponse } from "../services/refact";
 export const STUB_CAPS_RESPONSE: CapsResponse = {
   caps_version: 0,
   cloud_name: "Refact",
-  code_chat_default_model: "gpt-3.5-turbo",
-  code_chat_models: {
+  chat_default_model: "gpt-3.5-turbo",
+  chat_models: {
     "gpt-3.5-turbo": {
       default_scratchpad: "",
       n_ctx: 4096,
@@ -30,8 +30,8 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
       },
     },
   },
-  code_completion_default_model: "smallcloudai/Refact-1_6B-fim",
-  code_completion_models: {
+  completion_default_model: "smallcloudai/Refact-1_6B-fim",
+  completion_models: {
     "smallcloudai/Refact-1_6B-fim": {
       default_scratchpad: "FIM-SPM",
       n_ctx: 4096,
diff --git a/refact-agent/gui/src/app/middleware.ts b/refact-agent/gui/src/app/middleware.ts
index f00f7c3ef..2226c8727 100644
--- a/refact-agent/gui/src/app/middleware.ts
+++ b/refact-agent/gui/src/app/middleware.ts
@@ -14,6 +14,7 @@ import {
   upsertToolCall,
   sendCurrentChatToLspAfterToolCallUpdate,
   chatResponse,
+  chatError,
 } from "../features/Chat/Thread";
 import { statisticsApi } from "../services/refact/statistics";
 import { integrationsApi } from "../services/refact/integrations";
@@ -21,7 +22,11 @@ import { dockerApi } from "../services/refact/docker";
 import { capsApi, isCapsErrorResponse } from "../services/refact/caps";
 import { promptsApi } from "../services/refact/prompts";
 import { toolsApi } from "../services/refact/tools";
-import { commandsApi, isDetailMessage } from "../services/refact/commands";
+import {
+  commandsApi,
+  isDetailMessage,
+  isDetailMessageWithErrorType,
+} from "../services/refact/commands";
 import { pathApi } from "../services/refact/path";
 import { pingApi } from "../services/refact/ping";
 import {
@@ -44,7 +49,7 @@ import {
   ideForceReloadProjectTreeFiles,
 } from "../hooks/useEventBusForIDE";
 import { upsertToolCallIntoHistory } from "../features/History/historySlice";
-import { isToolResponse } from "../events";
+import { isToolResponse, modelsApi, providersApi } from "../services/refact";
 
 const AUTH_ERROR_MESSAGE =
   "There is an issue with your API key. Check out your API Key or re-login";
@@ -298,6 +303,40 @@ startListening({
     ) {
       listenerApi.dispatch(setError(action.payload));
     }
+
+    if (
+      (providersApi.endpoints.updateProvider.matchRejected(action) ||
+        providersApi.endpoints.getProvider.matchRejected(action) ||
+        providersApi.endpoints.getProviderTemplates.matchRejected(action) ||
+        providersApi.endpoints.getConfiguredProviders.matchRejected(action)) &&
+      !action.meta.condition
+    ) {
+      const errorStatus = action.payload?.status;
+      const isAuthError = errorStatus === 401;
+      const message = isAuthError
+        ? AUTH_ERROR_MESSAGE
+        : isDetailMessage(action.payload?.data)
+          ? action.payload.data.detail
+          : `provider update error.`;
+
+      listenerApi.dispatch(setError(message));
+      listenerApi.dispatch(setIsAuthError(isAuthError));
+    }
+    if (
+      modelsApi.endpoints.getModels.matchRejected(action) &&
+      !action.meta.condition
+    ) {
+      const errorStatus = action.payload?.status;
+      const isAuthError = errorStatus === 401;
+      const message = isAuthError
+        ? AUTH_ERROR_MESSAGE
+        : isDetailMessage(action.payload?.data)
+          ? action.payload.data.detail
+          : `provider update error.`;
+
+      listenerApi.dispatch(setError(message));
+      listenerApi.dispatch(setIsAuthError(isAuthError));
+    }
   },
 });
 
@@ -375,8 +414,6 @@ startListening({
     pathApi.endpoints.customizationPath.matchRejected,
     pathApi.endpoints.privacyPath.matchFulfilled,
     pathApi.endpoints.privacyPath.matchRejected,
-    pathApi.endpoints.bringYourOwnKeyPath.matchFulfilled,
-    pathApi.endpoints.bringYourOwnKeyPath.matchRejected,
     pathApi.endpoints.integrationsPath.matchFulfilled,
     pathApi.endpoints.integrationsPath.matchRejected,
   ),
@@ -390,10 +427,13 @@ startListening({
           : state.chat.thread;
       const scope = `sendChat_${thread.model}_${mode}`;
 
-      const errorMessage = isDetailMessage(action.payload)
-        ? action.payload.detail
-        : null;
-      if (errorMessage) {
+      if (isDetailMessageWithErrorType(action.payload)) {
+        const errorMessage = action.payload.detail;
+        listenerApi.dispatch(
+          action.payload.errorType === "GLOBAL"
+            ? setError(errorMessage)
+            : chatError({ id: chatId, message: errorMessage }),
+        );
         const thunk = telemetryApi.endpoints.sendTelemetryChatEvent.initiate({
           scope,
           success: false,
@@ -446,7 +486,6 @@ startListening({
     if (
       pathApi.endpoints.customizationPath.matchFulfilled(action) ||
       pathApi.endpoints.privacyPath.matchFulfilled(action) ||
-      pathApi.endpoints.bringYourOwnKeyPath.matchFulfilled(action) ||
       pathApi.endpoints.integrationsPath.matchFulfilled(action)
     ) {
       const thunk = telemetryApi.endpoints.sendTelemetryNetEvent.initiate({
@@ -461,7 +500,6 @@ startListening({
     if (
       (pathApi.endpoints.customizationPath.matchRejected(action) ||
         pathApi.endpoints.privacyPath.matchRejected(action) ||
-        pathApi.endpoints.bringYourOwnKeyPath.matchRejected(action) ||
         pathApi.endpoints.integrationsPath.matchRejected(action)) &&
       !action.meta.condition
     ) {
diff --git a/refact-agent/gui/src/app/store.ts b/refact-agent/gui/src/app/store.ts
index 49b48ee80..a227989cc 100644
--- a/refact-agent/gui/src/app/store.ts
+++ b/refact-agent/gui/src/app/store.ts
@@ -22,6 +22,8 @@ import {
   dockerApi,
   telemetryApi,
   knowledgeApi,
+  providersApi,
+  modelsApi,
 } from "../services/refact";
 import { smallCloudApi } from "../services/smallcloud";
 import { reducer as fimReducer } from "../features/FIM/reducer";
@@ -100,6 +102,8 @@ const rootReducer = combineSlices(
     [checkpointsApi.reducerPath]: checkpointsApi.reducer,
     [telemetryApi.reducerPath]: telemetryApi.reducer,
     [knowledgeApi.reducerPath]: knowledgeApi.reducer,
+    [providersApi.reducerPath]: providersApi.reducer,
+    [modelsApi.reducerPath]: modelsApi.reducer,
   },
   historySlice,
   errorSlice,
@@ -186,6 +190,8 @@ export function setUpStore(preloadedState?: Partial<RootState>) {
             checkpointsApi.middleware,
             telemetryApi.middleware,
             knowledgeApi.middleware,
+            providersApi.middleware,
+            modelsApi.middleware,
           )
           .prepend(historyMiddleware.middleware)
           // .prepend(errorMiddleware.middleware)
diff --git a/refact-agent/gui/src/components/Chat/Chat.tsx b/refact-agent/gui/src/components/Chat/Chat.tsx
index a28b815d7..b2bff2b49 100644
--- a/refact-agent/gui/src/components/Chat/Chat.tsx
+++ b/refact-agent/gui/src/components/Chat/Chat.tsx
@@ -135,8 +135,7 @@ export const Chat: React.FC<ChatProps> = ({
               <Flex align="center" gap="1">
                 <Text size="1">
                   model:{" "}
-                  {capsForToolUse.currentModel ||
-                    caps.data?.code_chat_default_model}{" "}
+                  {capsForToolUse.currentModel || caps.data?.chat_default_model}{" "}
                 </Text>{" "}
                 •{" "}
                 <Text
diff --git a/refact-agent/gui/src/components/DeletePopover/DeletePopover.module.css b/refact-agent/gui/src/components/DeletePopover/DeletePopover.module.css
new file mode 100644
index 000000000..fe5c79d67
--- /dev/null
+++ b/refact-agent/gui/src/components/DeletePopover/DeletePopover.module.css
@@ -0,0 +1,4 @@
+.disabledButton {
+  cursor: not-allowed;
+  backdrop-filter: blur(1000px);
+}
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationDeletePopover.tsx b/refact-agent/gui/src/components/DeletePopover/DeletePopover.tsx
similarity index 56%
rename from refact-agent/gui/src/components/IntegrationsView/IntegrationDeletePopover.tsx
rename to refact-agent/gui/src/components/DeletePopover/DeletePopover.tsx
index a1c57c499..400034839 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationDeletePopover.tsx
+++ b/refact-agent/gui/src/components/DeletePopover/DeletePopover.tsx
@@ -1,3 +1,4 @@
+import { FC } from "react";
 import {
   Button,
   Popover,
@@ -8,24 +9,23 @@ import {
   IconButton,
 } from "@radix-ui/themes";
 import classNames from "classnames";
-import { FC } from "react";
-import styles from "./IntegrationForm/IntegrationForm.module.css";
+import styles from "./DeletePopover.module.css";
 import { TrashIcon } from "@radix-ui/react-icons";
 
-type IntegrationDeletePopoverProps = {
-  isApplying: boolean;
-  isDeletingIntegration: boolean;
-  integrationName: string;
-  integrationConfigPath: string;
-  handleDeleteIntegration: (path: string, name: string) => void;
+export type DeletePopoverProps = {
+  isDisabled: boolean;
+  isDeleting: boolean;
+  itemName: string;
+  deleteBy: string;
+  handleDelete: (deleteBy: string) => void;
 };
 
-export const IntegrationDeletePopover: FC<IntegrationDeletePopoverProps> = ({
-  isApplying,
-  isDeletingIntegration,
-  integrationName,
-  integrationConfigPath,
-  handleDeleteIntegration,
+export const DeletePopover: FC<DeletePopoverProps> = ({
+  deleteBy,
+  itemName,
+  handleDelete,
+  isDeleting,
+  isDisabled,
 }) => {
   return (
     <Popover.Root>
@@ -35,18 +35,12 @@ export const IntegrationDeletePopover: FC<IntegrationDeletePopoverProps> = ({
           variant="outline"
           type="button"
           size="2"
-          title={"Delete configuration data of this integration"}
-          className={classNames(
-            {
-              [styles.disabledButton]: isDeletingIntegration || isApplying,
-            },
-            // styles.button,
-          )}
-          disabled={isDeletingIntegration || isApplying}
+          title={"Delete configuration data"}
+          className={classNames({
+            [styles.disabledButton]: isDeleting || isDisabled,
+          })}
+          disabled={isDeleting || isDisabled}
         >
-          {/* {isDeletingIntegration
-            ? "Deleting configuration..."
-            : "Delete configuration"} */}
           <TrashIcon width={20} height={20} />
         </IconButton>
       </Popover.Trigger>
@@ -59,7 +53,7 @@ export const IntegrationDeletePopover: FC<IntegrationDeletePopoverProps> = ({
                   Destructive action
                 </Heading>
                 <Text size="2">
-                  Do you really want to delete {integrationName}
+                  Do you really want to delete {itemName}
                   &apos;s configuration data?
                 </Text>
               </Flex>
@@ -70,12 +64,7 @@ export const IntegrationDeletePopover: FC<IntegrationDeletePopoverProps> = ({
                     size="2"
                     variant="solid"
                     color="red"
-                    onClick={() =>
-                      handleDeleteIntegration(
-                        integrationConfigPath,
-                        integrationName,
-                      )
-                    }
+                    onClick={() => handleDelete(deleteBy)}
                   >
                     Delete
                   </Button>
diff --git a/refact-agent/gui/src/components/DeletePopover/index.ts b/refact-agent/gui/src/components/DeletePopover/index.ts
new file mode 100644
index 000000000..23144d8f9
--- /dev/null
+++ b/refact-agent/gui/src/components/DeletePopover/index.ts
@@ -0,0 +1 @@
+export { DeletePopover } from "./DeletePopover";
diff --git a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css
index 78c7c7659..8dc29dadb 100644
--- a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css
+++ b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.module.css
@@ -27,11 +27,3 @@
   max-width: 30px;
   object-fit: cover;
 }
-
-.availabilitySwitch {
-  cursor: pointer;
-}
-
-.disabledAvailabilitySwitch {
-  cursor: not-allowed;
-}
diff --git a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx
index e43d18b34..c03c03897 100644
--- a/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/DisplayIntegrations/IntegrationCard.tsx
@@ -1,7 +1,7 @@
-import { FC, MouseEventHandler } from "react";
+import type { FC, MouseEventHandler } from "react";
 import classNames from "classnames";
 
-import { Badge, Card, Flex, Text } from "@radix-ui/themes";
+import { Card, Flex, Text } from "@radix-ui/themes";
 import { useAppSelector } from "../../../hooks";
 import { useUpdateIntegration } from "./useUpdateIntegration";
 
@@ -15,6 +15,7 @@ import { formatIntegrationIconPath } from "../../../utils/formatIntegrationIconP
 import { getIntegrationInfo } from "../../../utils/getIntegrationInfo";
 
 import styles from "./IntegrationCard.module.css";
+import { OnOffSwitch } from "../../OnOffSwitch/OnOffSwitch";
 
 type IntegrationCardProps = {
   integration:
@@ -54,11 +55,6 @@ export const IntegrationCard: FC<IntegrationCardProps> = ({
     void updateIntegrationAvailability();
   };
 
-  const switches = [
-    { label: "On", leftRadius: true },
-    { label: "Off", rightRadius: true },
-  ];
-
   return (
     <Card
       className={classNames(styles.integrationCard, {
@@ -94,42 +90,10 @@ export const IntegrationCard: FC<IntegrationCardProps> = ({
             {displayName}
           </Text>
           {!isNotConfigured && (
-            <Flex
-              className={classNames(styles.availabilitySwitch, {
-                [styles.disabledAvailabilitySwitch]: isUpdatingAvailability,
-              })}
-              onClick={handleAvailabilityClick}
-            >
-              {switches.map(({ label, leftRadius }) => {
-                const isOn = label === "On";
-                const isActive =
-                  isOn === integrationAvailability.on_your_laptop;
-
-                return (
-                  <Badge
-                    key={label}
-                    color={
-                      isActive && !isUpdatingAvailability ? "jade" : "gray"
-                    }
-                    variant="soft"
-                    radius="medium"
-                    style={{
-                      ...(leftRadius
-                        ? {
-                            borderTopRightRadius: 0,
-                            borderBottomRightRadius: 0,
-                          }
-                        : {
-                            borderTopLeftRadius: 0,
-                            borderBottomLeftRadius: 0,
-                          }),
-                    }}
-                  >
-                    {label}
-                  </Badge>
-                );
-              })}
-            </Flex>
+            <OnOffSwitch
+              isEnabled={integrationAvailability.on_your_laptop}
+              handleClick={handleAvailabilityClick}
+            />
           )}
         </Flex>
       </Flex>
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx
index d5f3b474c..909355bc2 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/ErrorState.tsx
@@ -1,14 +1,14 @@
 import { ExclamationTriangleIcon } from "@radix-ui/react-icons";
 import { Badge, Button, Flex, Text } from "@radix-ui/themes";
 import { FC } from "react";
-import { IntegrationDeletePopover } from "../IntegrationDeletePopover";
 import { Integration } from "../../../services/refact";
 import { useAppSelector, useEventsBusForIDE } from "../../../hooks";
 import { selectConfig } from "../../../features/Config/configSlice";
+import { DeletePopover } from "../../DeletePopover";
 
 type ErrorStateProps = {
   integration: Integration;
-  onDelete: (path: string, name: string) => void;
+  onDelete: (path: string) => void;
   isApplying: boolean;
   isDeletingIntegration: boolean;
 };
@@ -51,12 +51,12 @@ export const ErrorState: FC<ErrorStateProps> = ({
             Open {integr_name}.yaml
           </Button>
         )}
-        <IntegrationDeletePopover
-          integrationName={integr_name}
-          integrationConfigPath={integr_config_path}
-          isApplying={isApplying}
-          isDeletingIntegration={isDeletingIntegration}
-          handleDeleteIntegration={onDelete}
+        <DeletePopover
+          itemName={integr_name}
+          deleteBy={integr_config_path}
+          isDisabled={isApplying}
+          isDeleting={isDeletingIntegration}
+          handleDelete={onDelete}
         />
       </Flex>
     </Flex>
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx
index 6ad1770a0..065fcc092 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/FormAvailabilityAndDelete.tsx
@@ -4,7 +4,7 @@ import styles from "./IntegrationForm.module.css";
 import { FC } from "react";
 import { Integration } from "../../../services/refact";
 import { IntegrationAvailability } from "./IntegrationAvailability";
-import { IntegrationDeletePopover } from "../IntegrationDeletePopover";
+import { DeletePopover } from "../../DeletePopover";
 
 type FormAvailabilityAndDeleteProps = {
   integration: Integration;
@@ -12,7 +12,7 @@ type FormAvailabilityAndDeleteProps = {
   isApplying: boolean;
   isDeletingIntegration: boolean;
   handleAvailabilityChange: (fieldName: string, value: boolean) => void;
-  onDelete: (path: string, name: string) => void;
+  onDelete: (path: string) => void;
 };
 
 export const FormAvailabilityAndDelete: FC<FormAvailabilityAndDeleteProps> = ({
@@ -44,12 +44,12 @@ export const FormAvailabilityAndDelete: FC<FormAvailabilityAndDeleteProps> = ({
             />
           ))}
       </Flex>
-      <IntegrationDeletePopover
-        integrationName={integr_name}
-        integrationConfigPath={integr_config_path}
-        isApplying={isApplying}
-        isDeletingIntegration={isDeletingIntegration}
-        handleDeleteIntegration={onDelete}
+      <DeletePopover
+        itemName={integr_name}
+        deleteBy={integr_config_path}
+        isDisabled={isApplying}
+        isDeleting={isDeletingIntegration}
+        handleDelete={onDelete}
       />
     </Flex>
   );
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx
index e64f81a60..9dd353a05 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationForm/IntegrationForm.tsx
@@ -36,7 +36,7 @@ type IntegrationFormProps = {
   MCPArguments: string[];
   MCPEnvironmentVariables: Record<string, string>;
   handleSubmit: (event: FormEvent<HTMLFormElement>) => void;
-  handleDeleteIntegration: (path: string, name: string) => void;
+  handleDeleteIntegration: (path: string) => void;
   handleChange: (event: FormEvent<HTMLFormElement>) => void;
   onSchema: (schema: Integration["integr_schema"]) => void;
   onValues: (values: Integration["integr_values"]) => void;
diff --git a/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx b/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx
index c78aaf673..58d3a75c6 100644
--- a/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx
+++ b/refact-agent/gui/src/components/IntegrationsView/IntegrationsView.tsx
@@ -110,9 +110,7 @@ export const IntegrationsView: FC<IntegrationViewProps> = ({
       <Flex direction="column" align="start" justify="between" height="100%">
         <IntegrationForm
           handleSubmit={(event) => void handleSubmit(event)}
-          handleDeleteIntegration={(path, name) =>
-            void handleDeleteIntegration(path, name)
-          }
+          handleDeleteIntegration={(path) => void handleDeleteIntegration(path)}
           integrationPath={currentIntegration.integr_config_path}
           isApplying={isApplyingIntegrationForm}
           isDeletingIntegration={isDeletingIntegration}
diff --git a/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts b/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts
index db25974f9..aaabbea02 100644
--- a/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts
+++ b/refact-agent/gui/src/components/IntegrationsView/hooks/useIntegrations.ts
@@ -620,8 +620,8 @@ export const useIntegrations = ({
   );
 
   const handleDeleteIntegration = useCallback(
-    async (configurationPath: string, integrationName: string) => {
-      // if (!currentIntegration) return;
+    async (configurationPath: string) => {
+      if (!currentIntegration) return;
       setIsDeletingIntegration(true);
       const response = await deleteIntegrationTrigger(configurationPath);
       debugIntegrations("[DEBUG]: response: ", response);
@@ -632,7 +632,7 @@ export const useIntegrations = ({
       dispatch(
         setInformation(
           `${toPascalCase(
-            integrationName,
+            currentIntegration.integr_name,
           )} integration's configuration was deleted successfully!`,
         ),
       );
@@ -642,7 +642,7 @@ export const useIntegrations = ({
         clearTimeout(timeoutId);
       }, 1200);
     },
-    [dispatch, deleteIntegrationTrigger, handleFormReturn],
+    [currentIntegration, dispatch, deleteIntegrationTrigger, handleFormReturn],
   );
 
   const handleIntegrationFormChange = useCallback(
diff --git a/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.module.css b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.module.css
new file mode 100644
index 000000000..9bd21d870
--- /dev/null
+++ b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.module.css
@@ -0,0 +1,12 @@
+.switch {
+  cursor: pointer;
+}
+
+.disabled {
+  cursor: not-allowed;
+}
+
+.unavailable {
+  cursor: not-allowed;
+  opacity: 0.35;
+}
diff --git a/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.tsx b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.tsx
new file mode 100644
index 000000000..359ef6095
--- /dev/null
+++ b/refact-agent/gui/src/components/OnOffSwitch/OnOffSwitch.tsx
@@ -0,0 +1,61 @@
+import classNames from "classnames";
+import { Badge, Flex } from "@radix-ui/themes";
+
+import styles from "./OnOffSwitch.module.css";
+import React, { MouseEventHandler } from "react";
+
+const switches = [
+  { label: "On", leftRadius: true },
+  { label: "Off", rightRadius: true },
+];
+
+export type OnOffSwitchProps = {
+  isEnabled: boolean;
+  isUnavailable?: boolean;
+  isUpdating?: boolean;
+  handleClick: MouseEventHandler<HTMLDivElement>;
+};
+
+export const OnOffSwitch: React.FC<OnOffSwitchProps> = ({
+  isEnabled,
+  isUnavailable = false,
+  isUpdating = false,
+  handleClick,
+}) => {
+  return (
+    <Flex
+      className={classNames(styles.switch, {
+        [styles.disabled]: isUpdating,
+      })}
+      onClick={handleClick}
+    >
+      {switches.map(({ label, leftRadius }) => {
+        const isOn = label === "On";
+        const isActive = isOn === isEnabled;
+
+        return (
+          <Badge
+            key={label}
+            color={isActive && !isUpdating ? "jade" : "gray"}
+            variant="soft"
+            radius="medium"
+            className={classNames({ [styles.unavailable]: isUnavailable })}
+            style={{
+              ...(leftRadius
+                ? {
+                    borderTopRightRadius: 0,
+                    borderBottomRightRadius: 0,
+                  }
+                : {
+                    borderTopLeftRadius: 0,
+                    borderBottomLeftRadius: 0,
+                  }),
+            }}
+          >
+            {label}
+          </Badge>
+        );
+      })}
+    </Flex>
+  );
+};
diff --git a/refact-agent/gui/src/components/OnOffSwitch/index.ts b/refact-agent/gui/src/components/OnOffSwitch/index.ts
new file mode 100644
index 000000000..341a3aba9
--- /dev/null
+++ b/refact-agent/gui/src/components/OnOffSwitch/index.ts
@@ -0,0 +1 @@
+export { OnOffSwitch } from "./OnOffSwitch";
diff --git a/refact-agent/gui/src/components/Toolbar/Dropdown.tsx b/refact-agent/gui/src/components/Toolbar/Dropdown.tsx
index c3702f802..09ce17d43 100644
--- a/refact-agent/gui/src/components/Toolbar/Dropdown.tsx
+++ b/refact-agent/gui/src/components/Toolbar/Dropdown.tsx
@@ -6,7 +6,6 @@ import {
 } from "../../features/Config/configSlice";
 import { useTourRefs } from "../../features/Tour";
 import {
-  useConfig,
   useEventsBusForIDE,
   useGetUser,
   useLogout,
@@ -27,6 +26,7 @@ import {
   HamburgerMenuIcon,
   DiscordLogoIcon,
   QuestionMarkCircledIcon,
+  GearIcon,
 } from "@radix-ui/react-icons";
 import { clearHistory } from "../../features/History/historySlice";
 import { KnowledgeListPage } from "../../features/Pages/pagesSlice";
@@ -41,6 +41,7 @@ export type DropdownNavigationOptions =
   | "restart tour"
   | "login page"
   | "integrations"
+  | "providers"
   | KnowledgeListPage["name"]
   | "";
 
@@ -79,7 +80,6 @@ export const Dropdown: React.FC<DropdownProps> = ({
   const dispatch = useAppDispatch();
   const { maxAgentUsageAmount, currentAgentUsage } = useAgentUsage();
   const logout = useLogout();
-  const { addressURL } = useConfig();
   const knowledgeEnabled = useAppSelector(selectKnowledgeFeature);
   const { startPollingForUser } = useAgentUsage();
 
@@ -87,8 +87,7 @@ export const Dropdown: React.FC<DropdownProps> = ({
   const discordUrl = "https://www.smallcloud.ai/discord";
   const accountLink = linkForAccount(host);
   const openUrl = useOpenUrl();
-  const { openBringYourOwnKeyFile, openCustomizationFile, openPrivacyFile } =
-    useEventsBusForIDE();
+  const { openCustomizationFile, openPrivacyFile } = useEventsBusForIDE();
 
   const handleChatHistoryCleanUp = () => {
     dispatch(clearHistory());
@@ -188,6 +187,10 @@ export const Dropdown: React.FC<DropdownProps> = ({
           <PuzzleIcon /> Set up Agent Integrations
         </DropdownMenu.Item>
 
+        <DropdownMenu.Item onSelect={() => handleNavigation("providers")}>
+          <GearIcon /> Configure Providers
+        </DropdownMenu.Item>
+
         {knowledgeEnabled && (
           <DropdownMenu.Item
             onSelect={() => handleNavigation("knowledge list")}
@@ -220,16 +223,6 @@ export const Dropdown: React.FC<DropdownProps> = ({
           Edit privacy.yaml
         </DropdownMenu.Item>
 
-        {addressURL?.endsWith(".yaml") && (
-          <DropdownMenu.Item
-            onSelect={() => {
-              void openBringYourOwnKeyFile();
-            }}
-          >
-            Edit Bring Your Own Key
-          </DropdownMenu.Item>
-        )}
-
         <DropdownMenu.Separator />
 
         <DropdownMenu.Item onSelect={() => handleNavigation("restart tour")}>
diff --git a/refact-agent/gui/src/components/Toolbar/Toolbar.tsx b/refact-agent/gui/src/components/Toolbar/Toolbar.tsx
index df7b1a81b..c8ad01c86 100644
--- a/refact-agent/gui/src/components/Toolbar/Toolbar.tsx
+++ b/refact-agent/gui/src/components/Toolbar/Toolbar.tsx
@@ -141,6 +141,13 @@ export const Toolbar = ({ activeTab }: ToolbarProps) => {
           success: true,
           error_message: "",
         });
+      } else if (to === "providers") {
+        dispatch(push({ name: "providers page" }));
+        void sendTelemetryEvent({
+          scope: `openProviders`,
+          success: true,
+          error_message: "",
+        });
       } else if (to === "chat") {
         dispatch(popBackTo({ name: "history" }));
         dispatch(push({ name: "chat" }));
diff --git a/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx b/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx
index 89d26c095..ac8817713 100644
--- a/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx
+++ b/refact-agent/gui/src/components/UsageCounter/UsageCounter.tsx
@@ -66,31 +66,45 @@ const DefaultHoverCard: React.FC<{
   outputTokens: number;
 }> = ({ inputTokens, outputTokens }) => {
   const { currentThreadUsage } = useUsageCounter();
+  if (!currentThreadUsage) return null;
 
   return (
     <Flex direction="column" align="start" gap="2">
       <Text size="2" mb="2">
         Tokens spent per chat thread:
       </Text>
-      <TokenDisplay label="Input tokens (in total)" value={inputTokens} />
-      {currentThreadUsage?.cache_read_input_tokens !== undefined && (
-        <TokenDisplay
-          label="Cache read input tokens"
-          value={currentThreadUsage.cache_read_input_tokens}
-        />
+      {inputTokens !== 0 && (
+        <TokenDisplay label="Input tokens (in total)" value={inputTokens} />
       )}
-      {currentThreadUsage?.cache_creation_input_tokens !== undefined && (
-        <TokenDisplay
-          label="Cache creation input tokens"
-          value={currentThreadUsage.cache_creation_input_tokens}
-        />
+      {currentThreadUsage.cache_read_input_tokens !== undefined &&
+        currentThreadUsage.cache_read_input_tokens !== 0 && (
+          <TokenDisplay
+            label="Cache read input tokens"
+            value={currentThreadUsage.cache_read_input_tokens}
+          />
+        )}
+      {currentThreadUsage.cache_creation_input_tokens !== undefined &&
+        currentThreadUsage.cache_creation_input_tokens !== 0 && (
+          <TokenDisplay
+            label="Cache creation input tokens"
+            value={currentThreadUsage.cache_creation_input_tokens}
+          />
+        )}
+      {outputTokens !== 0 && (
+        <TokenDisplay label="Completion tokens" value={outputTokens} />
       )}
-      <TokenDisplay label="Completion tokens" value={outputTokens} />
-      {currentThreadUsage?.completion_tokens_details && (
-        <TokenDisplay
-          label="Reasoning tokens"
-          value={currentThreadUsage.completion_tokens_details.reasoning_tokens}
-        />
+      {currentThreadUsage.completion_tokens_details && (
+        <>
+          {currentThreadUsage.completion_tokens_details.reasoning_tokens !==
+            0 && (
+            <TokenDisplay
+              label="Reasoning tokens"
+              value={
+                currentThreadUsage.completion_tokens_details.reasoning_tokens
+              }
+            />
+          )}
+        </>
       )}
     </Flex>
   );
@@ -111,18 +125,22 @@ const InlineHoverTriggerContent: React.FC<{ messageTokens: number }> = ({
 
 const DefaultHoverTriggerContent: React.FC<{
   inputTokens: number;
-  outputValue: string;
-}> = ({ inputTokens, outputValue }) => {
+  outputTokens: number;
+}> = ({ inputTokens, outputTokens }) => {
   return (
     <>
-      <Flex align="center">
-        <ArrowUpIcon width="12" height="12" />
-        <Text size="1">{formatNumberToFixed(inputTokens)}</Text>
-      </Flex>
-      <Flex align="center">
-        <ArrowDownIcon width="12" height="12" />
-        <Text size="1">{outputValue}</Text>
-      </Flex>
+      {inputTokens !== 0 && (
+        <Flex align="center">
+          <ArrowUpIcon width="12" height="12" />
+          <Text size="1">{formatNumberToFixed(inputTokens)}</Text>
+        </Flex>
+      )}
+      {outputTokens !== 0 && (
+        <Flex align="center">
+          <ArrowDownIcon width="12" height="12" />
+          <Text size="1">{formatNumberToFixed(outputTokens)}</Text>
+        </Flex>
+      )}
     </>
   );
 };
@@ -154,7 +172,10 @@ export const UsageCounter: React.FC<UsageCounterProps> = ({
     usage: currentThreadUsage,
     keys: ["completion_tokens"],
   });
-  const outputValue = formatNumberToFixed(outputTokens);
+
+  const shouldUsageBeHidden = useMemo(() => {
+    return !isInline && inputTokens === 0 && outputTokens === 0;
+  }, [outputTokens, inputTokens, isInline]);
 
   useEffectOnce(() => {
     const handleScroll = (event: WheelEvent) => {
@@ -172,6 +193,8 @@ export const UsageCounter: React.FC<UsageCounterProps> = ({
     };
   });
 
+  if (shouldUsageBeHidden) return null;
+
   return (
     <HoverCard.Root open={open} onOpenChange={setOpen}>
       <HoverCard.Trigger>
@@ -187,7 +210,7 @@ export const UsageCounter: React.FC<UsageCounterProps> = ({
           ) : (
             <DefaultHoverTriggerContent
               inputTokens={inputTokens}
-              outputValue={outputValue}
+              outputTokens={outputTokens}
             />
           )}
         </Card>
diff --git a/refact-agent/gui/src/events/index.ts b/refact-agent/gui/src/events/index.ts
index b6f987208..1fd4369c7 100644
--- a/refact-agent/gui/src/events/index.ts
+++ b/refact-agent/gui/src/events/index.ts
@@ -76,6 +76,7 @@ export {
   ideIsChatReady,
   ideToolCall,
   ideToolCallResponse,
+  ideSetCodeCompletionModel,
 } from "../hooks/useEventBusForIDE";
 
 export { ideAttachFileToChat } from "../hooks/useEventBusForApp";
diff --git a/refact-agent/gui/src/events/setup.ts b/refact-agent/gui/src/events/setup.ts
index e588fb61e..2fa1b99f1 100644
--- a/refact-agent/gui/src/events/setup.ts
+++ b/refact-agent/gui/src/events/setup.ts
@@ -21,20 +21,12 @@ export interface EnterpriseHost {
   apiKey: string;
 }
 
-export interface BringYourOwnKey {
-  type: "bring-your-own-key";
-}
-
 export interface ActionFromSetup {
   type: EVENT_NAMES_FROM_SETUP;
   payload?: Record<string, unknown>;
 }
 
-export type HostSettings =
-  | CloudHost
-  | SelfHost
-  | EnterpriseHost
-  | BringYourOwnKey;
+export type HostSettings = CloudHost | SelfHost | EnterpriseHost;
 
 export function isActionFromSetup(action: unknown): action is ActionFromSetup {
   if (!action) return false;
diff --git a/refact-agent/gui/src/features/App.tsx b/refact-agent/gui/src/features/App.tsx
index aac76d736..218af159a 100644
--- a/refact-agent/gui/src/features/App.tsx
+++ b/refact-agent/gui/src/features/App.tsx
@@ -28,6 +28,7 @@ import { Tab } from "../components/Toolbar/Toolbar";
 import { PageWrapper } from "../components/PageWrapper";
 import { ThreadHistory } from "./ThreadHistory";
 import { Integrations } from "./Integrations";
+import { Providers } from "./Providers";
 import { UserSurvey } from "./UserSurvey";
 import { integrationsApi } from "../services/refact";
 import { KnowledgeList } from "./Knowledge";
@@ -202,6 +203,13 @@ export const InnerApp: React.FC<AppProps> = ({ style }: AppProps) => {
             handlePaddingShift={handlePaddingShift}
           />
         )}
+        {page.name === "providers page" && (
+          <Providers
+            backFromProviders={goBack}
+            tabbed={config.tabbed}
+            host={config.host}
+          />
+        )}
         {page.name === "thread history page" && (
           <ThreadHistory
             backFromThreadHistory={goBack}
diff --git a/refact-agent/gui/src/features/Chat/Chat.test.tsx b/refact-agent/gui/src/features/Chat/Chat.test.tsx
index 4f35a0017..beb32633e 100644
--- a/refact-agent/gui/src/features/Chat/Chat.test.tsx
+++ b/refact-agent/gui/src/features/Chat/Chat.test.tsx
@@ -249,9 +249,7 @@ describe("Chat", () => {
 
     await user.type(textarea, "hello");
 
-    await waitFor(() =>
-      app.queryByText(STUB_CAPS_RESPONSE.code_chat_default_model),
-    );
+    await waitFor(() => app.queryByText(STUB_CAPS_RESPONSE.chat_default_model));
 
     await user.keyboard("{Enter}");
 
@@ -294,7 +292,7 @@ describe("Chat", () => {
     // });
     await waitFor(() =>
       expect(
-        app.queryByText(STUB_CAPS_RESPONSE.code_chat_default_model),
+        app.queryByText(STUB_CAPS_RESPONSE.chat_default_model),
       ).not.toBeNull(),
     );
 
diff --git a/refact-agent/gui/src/features/Chat/Thread/actions.ts b/refact-agent/gui/src/features/Chat/Thread/actions.ts
index bbc08eca7..662b2742a 100644
--- a/refact-agent/gui/src/features/Chat/Thread/actions.ts
+++ b/refact-agent/gui/src/features/Chat/Thread/actions.ts
@@ -35,7 +35,11 @@ import { scanFoDuplicatesWith, takeFromEndWhile } from "../../../utils";
 import { debugApp } from "../../../debugConfig";
 import { ChatHistoryItem } from "../../History/historySlice";
 import { ideToolCallResponse } from "../../../hooks/useEventBusForIDE";
-import { capsApi } from "../../../services/refact";
+import {
+  capsApi,
+  DetailMessageWithErrorType,
+  isDetailMessage,
+} from "../../../services/refact";
 
 export const newChatAction = createAction<Partial<ChatThread> | undefined>(
   "chatThread/new",
@@ -163,10 +167,6 @@ export const setIncreaseMaxTokens = createAction<boolean>(
   "chatThread/setIncreaseMaxTokens",
 );
 
-export const setThreadPaused = createAction<boolean>(
-  "chatThread/setThreadPaused",
-);
-
 // TODO: This is the circular dep when imported from hooks :/
 const createAppAsyncThunk = createAsyncThunk.withTypes<{
   state: RootState;
@@ -200,7 +200,7 @@ export const chatGenerateTitleThunk = createAppAsyncThunk<
   const caps = await thunkAPI
     .dispatch(capsApi.endpoints.getCaps.initiate(undefined))
     .unwrap();
-  const model = caps.code_chat_default_model;
+  const model = caps.chat_default_model;
   const messagesForLsp = formatMessagesForLsp([
     ...messagesToSend,
     {
@@ -355,9 +355,10 @@ export const chatAskQuestionThunk = createAppAsyncThunk<
       mode: realMode,
       boost_reasoning: boostReasoning,
     })
-      .then((response) => {
+      .then(async (response) => {
         if (!response.ok) {
-          return Promise.reject(new Error(response.statusText));
+          const responseData = (await response.json()) as unknown;
+          return Promise.reject(responseData);
         }
         const reader = response.body?.getReader();
         if (!reader) return;
@@ -374,12 +375,22 @@ export const chatAskQuestionThunk = createAppAsyncThunk<
         };
         return consumeStream(reader, thunkAPI.signal, onAbort, onChunk);
       })
-      .catch((err: Error) => {
+      .catch((err: unknown) => {
         // console.log("Catch called");
+        const isError = err instanceof Error;
         thunkAPI.dispatch(doneStreaming({ id: chatId }));
-        thunkAPI.dispatch(chatError({ id: chatId, message: err.message }));
         thunkAPI.dispatch(fixBrokenToolMessages({ id: chatId }));
-        return thunkAPI.rejectWithValue(err.message);
+
+        const errorObject: DetailMessageWithErrorType = {
+          detail: isError
+            ? err.message
+            : isDetailMessage(err)
+              ? err.detail
+              : (err as string),
+          errorType: isError ? "CHAT" : "GLOBAL",
+        };
+
+        return thunkAPI.rejectWithValue(errorObject);
       })
       .finally(() => {
         thunkAPI.dispatch(setMaxNewTokens(DEFAULT_MAX_NEW_TOKENS));
diff --git a/refact-agent/gui/src/features/Chat/Thread/reducer.ts b/refact-agent/gui/src/features/Chat/Thread/reducer.ts
index 5510485f3..b12f9be70 100644
--- a/refact-agent/gui/src/features/Chat/Thread/reducer.ts
+++ b/refact-agent/gui/src/features/Chat/Thread/reducer.ts
@@ -233,6 +233,7 @@ export const chatReducer = createReducer(initialState, (builder) => {
   builder.addCase(doneStreaming, (state, action) => {
     if (state.thread.id !== action.payload.id) return state;
     state.streaming = false;
+    state.waiting_for_response = false;
     state.thread.read = true;
     state.prevent_send = false;
   });
@@ -428,13 +429,13 @@ export const chatReducer = createReducer(initialState, (builder) => {
   builder.addMatcher(
     capsApi.endpoints.getCaps.matchFulfilled,
     (state, action) => {
-      const defaultModel = action.payload.code_chat_default_model;
+      const defaultModel = action.payload.chat_default_model;
 
       const model = state.thread.model || defaultModel;
-      if (!(model in action.payload.code_chat_models)) return;
+      if (!(model in action.payload.chat_models)) return;
 
       const currentModelMaximumContextTokens =
-        action.payload.code_chat_models[model].n_ctx;
+        action.payload.chat_models[model].n_ctx;
 
       state.thread.currentMaximumContextTokens =
         currentModelMaximumContextTokens;
diff --git a/refact-agent/gui/src/features/Chat/Thread/utils.ts b/refact-agent/gui/src/features/Chat/Thread/utils.ts
index 827868b6f..80910f5d8 100644
--- a/refact-agent/gui/src/features/Chat/Thread/utils.ts
+++ b/refact-agent/gui/src/features/Chat/Thread/utils.ts
@@ -34,6 +34,7 @@ import {
   isUserResponse,
   ThinkingBlock,
   isToolCallMessage,
+  Usage,
 } from "../../../services/refact";
 import { parseOrElse } from "../../../utils";
 import { type LspChatMessage } from "../../../services/refact";
@@ -232,13 +233,32 @@ export function formatChatResponse(
 
   const currentUsage = response.usage;
 
-  if (currentUsage && response.choices.length === 0) {
+  if (currentUsage) {
     const lastAssistantIndex = lastIndexOf(messages, isAssistantMessage);
     if (lastAssistantIndex === -1) return messages;
 
+    const lastAssistantMessage = messages[lastAssistantIndex];
+    if (!isAssistantMessage(lastAssistantMessage)) return messages;
+
+    const maybeLastAssistantMessageUsage = lastAssistantMessage.usage;
+    let usageToStore = currentUsage;
+
+    if (
+      maybeLastAssistantMessageUsage &&
+      Object.entries(currentUsage).every(
+        ([key, value]) =>
+          maybeLastAssistantMessageUsage[key as keyof Usage] === value,
+      )
+    ) {
+      usageToStore = { ...maybeLastAssistantMessageUsage, ...currentUsage };
+    }
+
     return messages.map((message, index) =>
       index === lastAssistantIndex
-        ? { ...message, usage: currentUsage }
+        ? {
+            ...message,
+            usage: usageToStore,
+          }
         : message,
     );
   }
@@ -673,8 +693,7 @@ export function consumeStream(
       const str = decoder.decode(value);
       const maybeError = checkForDetailMessage(str);
       if (maybeError) {
-        const error = new Error(maybeError.detail);
-        throw error;
+        return Promise.reject(maybeError);
       }
     }
 
@@ -708,7 +727,9 @@ export function consumeStream(
 
       const maybeJsonString = delta.substring(6);
 
-      if (maybeJsonString === "[DONE]") return Promise.resolve();
+      if (maybeJsonString === "[DONE]") {
+        return Promise.resolve();
+      }
 
       if (maybeJsonString === "[ERROR]") {
         const errorMessage = "error from lsp";
@@ -726,7 +747,7 @@ export function consumeStream(
         const error = new Error(errorMessage);
         // eslint-disable-next-line no-console
         console.error(error);
-        throw error;
+        return Promise.reject(maybeErrorData);
       }
 
       const fallback = {};
diff --git a/refact-agent/gui/src/features/Login/LoginPage.tsx b/refact-agent/gui/src/features/Login/LoginPage.tsx
index e0bd8b27f..3bdefeb5d 100644
--- a/refact-agent/gui/src/features/Login/LoginPage.tsx
+++ b/refact-agent/gui/src/features/Login/LoginPage.tsx
@@ -200,32 +200,6 @@ export const LoginPage: React.FC = () => {
             </Flex>
           </Accordion.Content>
         </Accordion.Item>
-        <Accordion.Item value="byok">
-          <Accordion.Trigger>Bring your own key</Accordion.Trigger>
-          <Accordion.Content>
-            <Box>
-              <Text size="2">
-                <ul>
-                  <li>Connect to any OpenAI or Huggingface style server.</li>
-                  <li>
-                    Separate endpoints and keys for chat, completion, and
-                    embedding.
-                  </li>
-                </ul>
-              </Text>
-            </Box>
-            <Separator size="4" my="4" />
-            <Flex gap="3" justify="end" mb="2">
-              <Button
-                onClick={() => {
-                  setupHost({ type: "bring-your-own-key" });
-                }}
-              >
-                Open in IDE
-              </Button>
-            </Flex>
-          </Accordion.Content>
-        </Accordion.Item>
       </Accordion.Root>
     </Container>
   );
diff --git a/refact-agent/gui/src/features/Pages/pagesSlice.ts b/refact-agent/gui/src/features/Pages/pagesSlice.ts
index b9270d13b..6a36242d8 100644
--- a/refact-agent/gui/src/features/Pages/pagesSlice.ts
+++ b/refact-agent/gui/src/features/Pages/pagesSlice.ts
@@ -38,6 +38,10 @@ export interface LoginPage {
   name: "login page";
 }
 
+export interface ProvidersPage {
+  name: "providers page";
+}
+
 export interface IntegrationsSetupPage {
   name: "integrations page";
   projectPath?: string;
@@ -61,6 +65,7 @@ export type Page =
   | DocumentationSettingsPage
   | ChatThreadHistoryPage
   | IntegrationsSetupPage
+  | ProvidersPage
   | KnowledgeListPage
   | LoginPage;
 
diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.module.css b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.module.css
new file mode 100644
index 000000000..4a820f1a8
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.module.css
@@ -0,0 +1,10 @@
+.providerCard {
+  cursor: pointer;
+  user-select: none;
+  transition: opacity 0.15s ease-in-out;
+}
+
+.providerCardDisabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.tsx b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.tsx
new file mode 100644
index 000000000..8856ea871
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderCard/ProviderCard.tsx
@@ -0,0 +1,64 @@
+import React from "react";
+import { Card, Flex, Heading } from "@radix-ui/themes";
+
+import { OnOffSwitch } from "../../../components/OnOffSwitch/OnOffSwitch";
+import { iconsMap } from "../icons/iconsMap";
+
+import type { ConfiguredProvidersResponse } from "../../../services/refact";
+
+import { getProviderName } from "../getProviderName";
+import { useProviderCard } from "./useProviderCard";
+
+import styles from "./ProviderCard.module.css";
+import { useUpdateProvider } from "../useUpdateProvider";
+import classNames from "classnames";
+
+export type ProviderCardProps = {
+  provider: ConfiguredProvidersResponse["providers"][number];
+  setCurrentProvider: (
+    provider: ConfiguredProvidersResponse["providers"][number],
+  ) => void;
+};
+
+export const ProviderCard: React.FC<ProviderCardProps> = ({
+  provider,
+  setCurrentProvider,
+}) => {
+  const { isUpdatingEnabledState } = useUpdateProvider({
+    provider,
+  });
+
+  const { handleClickOnProvider, handleSwitchClick } = useProviderCard({
+    provider,
+    setCurrentProvider,
+  });
+
+  return (
+    <Card
+      size="2"
+      onClick={handleClickOnProvider}
+      className={classNames(styles.providerCard, {
+        [styles.providerCardDisabled]: isUpdatingEnabledState,
+      })}
+    >
+      <Flex align="center" justify="between">
+        <Flex gap="3" align="center">
+          {iconsMap[provider.name]}
+          <Heading as="h6" size="2">
+            {getProviderName(provider)}
+          </Heading>
+        </Flex>
+        {!provider.readonly && (
+          <Flex align="center" gap="2">
+            <OnOffSwitch
+              isEnabled={provider.enabled}
+              isUpdating={isUpdatingEnabledState}
+              isUnavailable={isUpdatingEnabledState}
+              handleClick={handleSwitchClick}
+            />
+          </Flex>
+        )}
+      </Flex>
+    </Card>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/index.ts b/refact-agent/gui/src/features/Providers/ProviderCard/index.ts
new file mode 100644
index 000000000..705ad4825
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderCard/index.ts
@@ -0,0 +1 @@
+export { ProviderCard, type ProviderCardProps } from "./ProviderCard";
diff --git a/refact-agent/gui/src/features/Providers/ProviderCard/useProviderCard.ts b/refact-agent/gui/src/features/Providers/ProviderCard/useProviderCard.ts
new file mode 100644
index 000000000..45ee65cfd
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderCard/useProviderCard.ts
@@ -0,0 +1,32 @@
+import { type MouseEventHandler, useCallback } from "react";
+import { ProviderCardProps } from "./ProviderCard";
+import { useUpdateProvider } from "../useUpdateProvider";
+
+export function useProviderCard({
+  provider,
+  setCurrentProvider,
+}: {
+  provider: ProviderCardProps["provider"];
+  setCurrentProvider: ProviderCardProps["setCurrentProvider"];
+}) {
+  const { updateProviderEnabledState, isUpdatingEnabledState } =
+    useUpdateProvider({ provider });
+
+  const handleClickOnProvider = useCallback(() => {
+    if (isUpdatingEnabledState) return;
+
+    setCurrentProvider(provider);
+  }, [setCurrentProvider, provider, isUpdatingEnabledState]);
+
+  const handleSwitchClick: MouseEventHandler<HTMLDivElement> = (event) => {
+    if (isUpdatingEnabledState) return;
+
+    event.stopPropagation();
+    void updateProviderEnabledState();
+  };
+
+  return {
+    handleClickOnProvider,
+    handleSwitchClick,
+  };
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/FormFields.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/FormFields.tsx
new file mode 100644
index 000000000..4df26f5c3
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/FormFields.tsx
@@ -0,0 +1,75 @@
+import { FC } from "react";
+import classNames from "classnames";
+
+import { Flex, Select, TextField } from "@radix-ui/themes";
+import { toPascalCase } from "../../../utils/toPascalCase";
+
+import type { Provider } from "../../../services/refact";
+
+import styles from "./ProviderForm.module.css";
+
+export type FormFieldsProps = {
+  providerData: Provider;
+  fields: Record<string, string | boolean>;
+  onChange: (updatedProviderData: Provider) => void;
+};
+
+export const FormFields: FC<FormFieldsProps> = ({
+  providerData,
+  fields,
+  onChange,
+}) => {
+  return Object.entries(fields).map(([key, value], idx) => {
+    if (key === "endpoint_style" && providerData.name === "custom") {
+      const availableOptions: Provider["endpoint_style"][] = ["openai", "hf"];
+      const displayValues = ["OpenAI", "HuggingFace"];
+      return (
+        <Flex key={`${key}_${idx}`} direction="column">
+          {toPascalCase(key)}
+          <Select.Root
+            defaultValue={value.toString()}
+            onValueChange={(value: Provider["endpoint_style"]) =>
+              onChange({ ...providerData, endpoint_style: value })
+            }
+            disabled={providerData.readonly}
+          >
+            <Select.Trigger />
+            <Select.Content position="popper">
+              {availableOptions.map((option, idx) => (
+                <Select.Item key={option} value={option}>
+                  {displayValues[idx]}
+                </Select.Item>
+              ))}
+            </Select.Content>
+          </Select.Root>
+        </Flex>
+      );
+    }
+
+    if (key === "endpoint_style") return null;
+
+    if (
+      !providerData.supports_completion &&
+      (key === "completion_default_model" || key === "completion_endpoint")
+    ) {
+      return null;
+    }
+
+    return (
+      <Flex key={`${key}_${idx}`} direction="column" gap="1">
+        <label htmlFor={key}>{toPascalCase(key)}</label>
+        <TextField.Root
+          id={key}
+          value={value.toString()}
+          onChange={(event) =>
+            onChange({ ...providerData, [key]: event.target.value })
+          }
+          className={classNames({
+            [styles.disabledField]: providerData.readonly,
+          })}
+          disabled={providerData.readonly}
+        />
+      </Flex>
+    );
+  });
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.module.css b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.module.css
new file mode 100644
index 000000000..92073748c
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.module.css
@@ -0,0 +1,19 @@
+.button {
+  transition: all 0.1s ease-in-out;
+}
+
+.extraButton {
+  width: 100%;
+  padding: 5px 0;
+}
+.disabledSwitch {
+  span {
+    background-color: rgba(255, 255, 255, 0.3);
+  }
+}
+
+.disabledField {
+  input {
+    cursor: not-allowed;
+  }
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.tsx
new file mode 100644
index 000000000..86e4a1b5d
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderForm.tsx
@@ -0,0 +1,123 @@
+import React from "react";
+import classNames from "classnames";
+import { Button, Flex, Separator, Switch } from "@radix-ui/themes";
+
+import { FormFields } from "./FormFields";
+import { Spinner } from "../../../components/Spinner";
+
+import { useProviderForm } from "./useProviderForm";
+import type { Provider, SimplifiedProvider } from "../../../services/refact";
+
+import { toPascalCase } from "../../../utils/toPascalCase";
+import { aggregateProviderFields } from "./utils";
+
+import styles from "./ProviderForm.module.css";
+import { ProviderModelsList } from "./ProviderModelsList/ProviderModelsList";
+
+export type ProviderFormProps = {
+  currentProvider: SimplifiedProvider<
+    "name" | "enabled" | "readonly" | "supports_completion"
+  >;
+  isProviderConfigured: boolean;
+  isSaving: boolean;
+  handleDiscardChanges: () => void;
+  handleSaveChanges: (updatedProviderData: Provider) => void;
+};
+
+export const ProviderForm: React.FC<ProviderFormProps> = ({
+  currentProvider,
+  isProviderConfigured,
+  isSaving,
+  handleDiscardChanges,
+  handleSaveChanges,
+}) => {
+  const {
+    areShowingExtraFields,
+    formValues,
+    handleFormValuesChange,
+    isProviderLoadedSuccessfully,
+    setAreShowingExtraFields,
+    shouldSaveButtonBeDisabled,
+  } = useProviderForm({ providerName: currentProvider.name });
+
+  if (!isProviderLoadedSuccessfully || !formValues) return <Spinner spinning />;
+
+  const { extraFields, importantFields } = aggregateProviderFields(formValues);
+
+  return (
+    <Flex
+      direction="column"
+      width="100%"
+      height="100%"
+      mt="2"
+      justify="between"
+    >
+      <Flex direction="column" width="100%" gap="2">
+        <Flex align="center" justify="between" gap="3" mb="2">
+          <label htmlFor={"enabled"}>{toPascalCase("enabled")}</label>
+          <Switch
+            id={"enabled"}
+            checked={Boolean(formValues.enabled)}
+            value={formValues.enabled ? "on" : "off"}
+            disabled={formValues.readonly}
+            className={classNames({
+              [styles.disabledSwitch]: formValues.readonly,
+            })}
+            onCheckedChange={(checked) =>
+              handleFormValuesChange({ ...formValues, ["enabled"]: checked })
+            }
+          />
+        </Flex>
+        <Separator size="4" mb="2" />
+        <Flex direction="column" gap="2">
+          <FormFields
+            providerData={formValues}
+            fields={importantFields}
+            onChange={handleFormValuesChange}
+          />
+        </Flex>
+
+        {areShowingExtraFields && (
+          <Flex direction="column" gap="2" mt="4">
+            <FormFields
+              providerData={formValues}
+              fields={extraFields}
+              onChange={handleFormValuesChange}
+            />
+          </Flex>
+        )}
+        <Flex my="2" align="center" justify="center">
+          <Button
+            className={classNames(styles.button, styles.extraButton)}
+            variant="ghost"
+            color="gray"
+            onClick={() => setAreShowingExtraFields((prev) => !prev)}
+          >
+            {areShowingExtraFields ? "Hide" : "Show"} advanced fields
+          </Button>
+        </Flex>
+        {isProviderConfigured && (
+          <ProviderModelsList provider={currentProvider} />
+        )}
+      </Flex>
+      <Flex gap="2" align="center" mt="4">
+        <Button
+          className={styles.button}
+          variant="outline"
+          onClick={handleDiscardChanges}
+        >
+          Cancel
+        </Button>
+        <Button
+          className={styles.button}
+          variant="solid"
+          disabled={isSaving || shouldSaveButtonBeDisabled}
+          title="Save Provider configuration"
+          onClick={() => handleSaveChanges(formValues)}
+        >
+          {isSaving ? "Saving..." : "Save"}
+        </Button>
+      </Flex>
+    </Flex>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.module.css b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.module.css
new file mode 100644
index 000000000..e61fee90d
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.module.css
@@ -0,0 +1,5 @@
+.disabledCard {
+  opacity: 0.5;
+  pointer-events: none;
+  transition: 0.15s ease-in-out;
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.tsx
new file mode 100644
index 000000000..e5b07fb2f
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ModelCard.tsx
@@ -0,0 +1,167 @@
+import { useCallback, useMemo, type FC } from "react";
+import classNames from "classnames";
+import {
+  Badge,
+  Card,
+  DropdownMenu,
+  Flex,
+  IconButton,
+  Text,
+} from "@radix-ui/themes";
+import { DotsVerticalIcon } from "@radix-ui/react-icons";
+
+import { ModelCardPopup } from "./components/ModelCardPopup";
+import { useModelDialogState } from "./hooks/useModelDialogState";
+
+import type { ModelType, SimplifiedModel } from "../../../../services/refact";
+
+import styles from "./ModelCard.module.css";
+import { useEventsBusForIDE } from "../../../../hooks";
+
+export type ModelCardProps = {
+  model: SimplifiedModel;
+  providerName: string;
+  modelType: ModelType;
+  isReadonlyProvider: boolean;
+  currentModelNames: string[];
+};
+
+/**
+ * Card component that displays model information and provides access to model settings
+ */
+export const ModelCard: FC<ModelCardProps> = ({
+  model,
+  modelType,
+  providerName,
+  isReadonlyProvider,
+  currentModelNames,
+}) => {
+  const { enabled, name, removable, user_configured } = model;
+  const {
+    isOpen: dialogOpen,
+    setIsOpen: setDialogOpen,
+    dropdownOpen,
+    setDropdownOpen,
+    openDialogSafely,
+    isSavingModel,
+    handleToggleModelEnabledState,
+    handleRemoveModel,
+    handleResetModel,
+    handleSaveModel,
+    handleUpdateModel,
+  } = useModelDialogState({
+    initialState: false,
+    modelType,
+    providerName,
+  });
+
+  const { setCodeCompletionModel } = useEventsBusForIDE();
+
+  const handleSetCompletionModelForIDE = useCallback(() => {
+    const formattedModelName = `${providerName}/${model.name}`;
+    setCodeCompletionModel(formattedModelName);
+  }, [model, providerName, setCodeCompletionModel]);
+
+  const dropdownOptions = useMemo(() => {
+    const shouldOptionsBeDisabled = isReadonlyProvider || isSavingModel;
+    return [
+      {
+        label: "Edit model's settings",
+        onClick: openDialogSafely,
+        visible: !shouldOptionsBeDisabled,
+      },
+      {
+        label: enabled ? "Disable model" : "Enable model",
+        onClick: () => void handleToggleModelEnabledState(model),
+        visible: !shouldOptionsBeDisabled,
+      },
+      {
+        label: "Reset model",
+        onClick: () => void handleResetModel(model),
+        visible: !removable && user_configured,
+      },
+      {
+        label: "Remove model",
+        onClick: () => void handleRemoveModel({ model }),
+        visible: removable,
+      },
+      {
+        label: "Use as completion model in IDE",
+        onClick: handleSetCompletionModelForIDE,
+        visible: modelType === "completion",
+      },
+    ];
+  }, [
+    isReadonlyProvider,
+    isSavingModel,
+    enabled,
+    removable,
+    user_configured,
+    model,
+    modelType,
+    openDialogSafely,
+    handleToggleModelEnabledState,
+    handleResetModel,
+    handleRemoveModel,
+    handleSetCompletionModelForIDE,
+  ]);
+
+  const dropdownOptionsCount = useMemo(() => {
+    return dropdownOptions.filter((option) => option.visible).length;
+  }, [dropdownOptions]);
+
+  return (
+    <Card className={classNames({ [styles.disabledCard]: isSavingModel })}>
+      {dialogOpen && (
+        <ModelCardPopup
+          minifiedModel={model}
+          isOpen={dialogOpen}
+          isSaving={isSavingModel}
+          setIsOpen={setDialogOpen}
+          modelName={name}
+          modelType={modelType}
+          providerName={providerName}
+          onSave={handleSaveModel}
+          onUpdate={handleUpdateModel}
+          isRemovable={removable}
+          currentModelNames={currentModelNames}
+        />
+      )}
+
+      <Flex align="center" justify="between">
+        <Flex gap="2" align="center">
+          <Text as="span" size="2">
+            {name}
+          </Text>
+          <Badge size="1" color={enabled ? "green" : "gray"}>
+            {enabled ? "Active" : "Inactive"}
+          </Badge>
+        </Flex>
+
+        {dropdownOptionsCount > 0 && (
+          <DropdownMenu.Root open={dropdownOpen} onOpenChange={setDropdownOpen}>
+            <DropdownMenu.Trigger>
+              <IconButton size="1" variant="outline" color="gray">
+                <DotsVerticalIcon />
+              </IconButton>
+            </DropdownMenu.Trigger>
+            <DropdownMenu.Content side="bottom" align="end" size="1">
+              {dropdownOptions.map(({ label, visible, onClick }) => {
+                if (!visible) return null;
+                return (
+                  <DropdownMenu.Item
+                    key={label}
+                    onClick={onClick}
+                    title={label}
+                  >
+                    {label}
+                  </DropdownMenu.Item>
+                );
+              })}
+            </DropdownMenu.Content>
+          </DropdownMenu.Root>
+        )}
+      </Flex>
+    </Card>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ProviderModelsList.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ProviderModelsList.tsx
new file mode 100644
index 000000000..e36eccacb
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/ProviderModelsList.tsx
@@ -0,0 +1,126 @@
+import { useCallback, type FC } from "react";
+import { Flex, Heading, Separator, Text } from "@radix-ui/themes";
+
+import type { ProviderFormProps } from "../ProviderForm";
+
+import { Spinner } from "../../../../components/Spinner";
+import { ModelCard } from "./ModelCard";
+import { AddModelButton } from "./components";
+
+import { useGetModelsByProviderNameQuery } from "../../../../hooks/useModelsQuery";
+import { ModelsResponse } from "../../../../services/refact";
+
+export type ProviderModelsListProps = {
+  provider: ProviderFormProps["currentProvider"];
+};
+
+const NoModelsText: FC = () => {
+  return (
+    <Text as="span" size="2" color="gray">
+      No models available, but you can add one by clicking &apos;Add model&apos;
+    </Text>
+  );
+};
+
+export const ProviderModelsList: FC<ProviderModelsListProps> = ({
+  provider,
+}) => {
+  const {
+    data: modelsData,
+    isSuccess,
+    isLoading,
+  } = useGetModelsByProviderNameQuery({
+    providerName: provider.name,
+  });
+
+  const getModelNames = useCallback((modelsData: ModelsResponse) => {
+    const currentChatModelNames = modelsData.chat_models.map((m) => m.name);
+    const currentCompletionModelNames = modelsData.completion_models.map(
+      (m) => m.name,
+    );
+
+    return {
+      currentChatModelNames,
+      currentCompletionModelNames,
+    };
+  }, []);
+
+  if (isLoading) return <Spinner spinning />;
+
+  if (!isSuccess) return <div>Something went wrong :/</div>;
+
+  const { chat_models, completion_models } = modelsData;
+
+  const { currentChatModelNames, currentCompletionModelNames } =
+    getModelNames(modelsData);
+
+  return (
+    <Flex direction="column" gap="2">
+      <Heading as="h3" size="3">
+        Models list
+      </Heading>
+      <Separator size="4" />
+      <Heading as="h6" size="2" my="2">
+        Chat Models
+      </Heading>
+      {chat_models.length > 0 ? (
+        chat_models.map((m) => {
+          return (
+            <ModelCard
+              key={`${m.name}_chat`}
+              model={m}
+              providerName={provider.name}
+              modelType="chat"
+              isReadonlyProvider={provider.readonly}
+              currentModelNames={currentChatModelNames}
+            />
+          );
+        })
+      ) : (
+        <NoModelsText />
+      )}
+      {!provider.readonly && (
+        <AddModelButton
+          modelType="chat"
+          providerName={provider.name}
+          currentModelNames={currentChatModelNames}
+        />
+      )}
+      {provider.supports_completion && (
+        <>
+          <Heading as="h6" size="2" my="2">
+            Completion Models
+          </Heading>
+          {completion_models.length > 0 ? (
+            completion_models.map((m) => {
+              return (
+                <ModelCard
+                  key={`${m.name}_completion`}
+                  model={m}
+                  providerName={provider.name}
+                  modelType="completion"
+                  isReadonlyProvider={provider.readonly}
+                  currentModelNames={currentCompletionModelNames}
+                />
+              );
+            })
+          ) : (
+            <NoModelsText />
+          )}
+          {!provider.readonly && (
+            <AddModelButton
+              modelType="completion"
+              providerName={provider.name}
+              currentModelNames={currentCompletionModelNames}
+            />
+          )}
+        </>
+      )}
+      {/* TODO: do we want to expose embedding model configuration updates? */}
+      {/* <Heading as="h6" size="2">
+        Embedding Model
+      </Heading>
+      <div>{modelsData.embedding_model.name}</div> */}
+    </Flex>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/AddModelButton.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/AddModelButton.tsx
new file mode 100644
index 000000000..d539aafea
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/AddModelButton.tsx
@@ -0,0 +1,54 @@
+import type { FC } from "react";
+import { useModelDialogState } from "../hooks";
+import { ModelType } from "../../../../../services/refact";
+import { ModelCardPopup } from "./ModelCardPopup";
+import { Button } from "@radix-ui/themes";
+
+export type AddModelButtonProps = {
+  modelType: ModelType;
+  providerName: string;
+  currentModelNames: string[];
+};
+
+export const AddModelButton: FC<AddModelButtonProps> = ({
+  modelType,
+  providerName,
+  currentModelNames,
+}) => {
+  const {
+    isOpen,
+    setIsOpen,
+    isSavingModel,
+    handleSaveModel,
+    handleUpdateModel,
+  } = useModelDialogState({
+    modelType,
+    providerName,
+    initialState: false,
+  });
+
+  return (
+    <>
+      <ModelCardPopup
+        isOpen={isOpen}
+        isSaving={isSavingModel}
+        setIsOpen={setIsOpen}
+        providerName={providerName}
+        modelName=""
+        modelType={modelType}
+        onSave={handleSaveModel}
+        onUpdate={handleUpdateModel}
+        currentModelNames={currentModelNames}
+        newModelCreation
+      />
+      <Button
+        variant="outline"
+        size="1"
+        color="gray"
+        onClick={() => setIsOpen(!isOpen)}
+      >
+        Add model
+      </Button>
+    </>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/CapabilityBadge.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/CapabilityBadge.tsx
new file mode 100644
index 000000000..07cc633e1
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/CapabilityBadge.tsx
@@ -0,0 +1,38 @@
+import { Badge } from "@radix-ui/themes";
+import { CheckIcon, Cross1Icon } from "@radix-ui/react-icons";
+import { FC } from "react";
+
+type CapabilityBadgeProps = {
+  name: string;
+  enabled: boolean;
+  displayValue?: string | null;
+  onClick?: () => void;
+  interactive?: boolean;
+};
+
+/**
+ * Reusable component for model capability badges
+ */
+export const CapabilityBadge: FC<CapabilityBadgeProps> = ({
+  name,
+  enabled,
+  onClick,
+  displayValue = null,
+  interactive = true,
+}) => {
+  const icon = enabled ? (
+    <CheckIcon width="12px" />
+  ) : (
+    <Cross1Icon width="12px" />
+  );
+
+  return (
+    <Badge
+      color={enabled ? "green" : "gray"}
+      onClick={interactive ? onClick : undefined}
+      style={interactive ? { cursor: "pointer" } : undefined}
+    >
+      {name} {displayValue ? displayValue : icon}
+    </Badge>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormField.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormField.tsx
new file mode 100644
index 000000000..326afd8fe
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormField.tsx
@@ -0,0 +1,53 @@
+import { Text, TextField } from "@radix-ui/themes";
+import { FC, ReactNode } from "react";
+import { Markdown } from "../../../../../components/Markdown";
+
+type FormFieldProps = {
+  label: string;
+  value?: string;
+  placeholder?: string;
+  description?: string;
+  type?: TextField.RootProps["type"];
+  isDisabled?: boolean;
+  max?: string;
+  onChange?: React.ChangeEventHandler<HTMLInputElement>;
+  children?: ReactNode;
+};
+
+/**
+ * Reusable form field component with consistent styling
+ */
+export const FormField: FC<FormFieldProps> = ({
+  label,
+  value,
+  placeholder,
+  description,
+  isDisabled,
+  type,
+  max,
+  onChange,
+  children,
+}) => {
+  return (
+    <label>
+      <Text as="div" size="2" mb="1" weight="bold">
+        {label}
+      </Text>
+      {description && (
+        <Text as="div" size="1" color="gray" my="1">
+          <Markdown>{description}</Markdown>
+        </Text>
+      )}
+      {children ?? (
+        <TextField.Root
+          value={value}
+          placeholder={placeholder}
+          type={type}
+          max={max}
+          onChange={onChange}
+          disabled={isDisabled}
+        />
+      )}
+    </label>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormSelect.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormSelect.tsx
new file mode 100644
index 000000000..4f4ad421a
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/FormSelect.tsx
@@ -0,0 +1,69 @@
+import { Flex, Select, Text } from "@radix-ui/themes";
+import { ReactNode } from "react";
+
+type FormSelectProps<OptionType> = {
+  label: string;
+  options?: OptionType[];
+  optionTransformer?: (option: OptionType) => OptionType;
+  value: string;
+  placeholder?: string;
+  description?: string;
+  isDisabled?: boolean;
+  onValueChange?: (value: string) => void;
+  children?: ReactNode;
+};
+
+/**
+ * Type for the options of the form select component
+ */
+export type OptionType = string | null;
+
+/**
+ * Reusable form select component with consistent styling
+ */
+export function FormSelect({
+  label,
+  options,
+  value,
+  placeholder,
+  description,
+  isDisabled,
+  onValueChange,
+  optionTransformer,
+}: FormSelectProps<OptionType>) {
+  return (
+    <Flex direction="column">
+      <Text as="div" size="2" mb="1" weight="bold">
+        {label}
+      </Text>
+      {description && (
+        <Text as="p" size="1" color="gray">
+          {description}
+        </Text>
+      )}
+      <Select.Root
+        value={value}
+        onValueChange={onValueChange}
+        disabled={isDisabled}
+      >
+        <Select.Trigger placeholder={placeholder} />
+        <Select.Content position="popper">
+          {options?.map((option) => {
+            if (option !== null) {
+              return (
+                <Select.Item key={option} value={option}>
+                  {optionTransformer ? optionTransformer(option) : option}{" "}
+                </Select.Item>
+              );
+            }
+            return (
+              <Select.Item key={option} value="null">
+                None
+              </Select.Item>
+            );
+          })}
+        </Select.Content>
+      </Select.Root>
+    </Flex>
+  );
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/ModelCardPopup.tsx b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/ModelCardPopup.tsx
new file mode 100644
index 000000000..216a2fa93
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/ModelCardPopup.tsx
@@ -0,0 +1,512 @@
+import type { FC } from "react";
+import React, {
+  useState,
+  useEffect,
+  useCallback,
+  ChangeEvent,
+  useMemo,
+} from "react";
+import isEqual from "lodash.isequal";
+import { Button, Dialog, Flex, Text } from "@radix-ui/themes";
+
+import {
+  useGetCompletionModelFamiliesQuery,
+  useGetModelConfiguration,
+  useGetModelDefaults,
+} from "../../../../../hooks/useModelsQuery";
+
+import { FormField } from "./FormField";
+import { CapabilityBadge } from "./CapabilityBadge";
+
+import type {
+  CodeChatModel,
+  CodeCompletionModel,
+  EmbeddingModel,
+  Model,
+  ModelType,
+  SimplifiedModel,
+  SupportsReasoningStyle,
+} from "../../../../../services/refact";
+
+import { extractHumanReadableReasoningType } from "../utils";
+import { useEffectOnce } from "../../../../../hooks";
+import { FormSelect } from "./FormSelect";
+import { Spinner } from "../../../../../components/Spinner";
+
+const SUPPORTED_REASONING_STYLES: SupportsReasoningStyle[] = [
+  "openai",
+  "deepseek",
+  "anthropic",
+  null,
+];
+
+export type ModelCardPopupProps = {
+  minifiedModel?: SimplifiedModel;
+  isOpen: boolean;
+  isSaving: boolean;
+  setIsOpen: (state: boolean) => void;
+  onSave: (model: Model) => Promise<boolean>;
+  onUpdate: ({
+    model,
+    oldModel,
+  }: {
+    model: Model;
+    oldModel: SimplifiedModel;
+  }) => Promise<boolean>;
+  modelName: string;
+  modelType: ModelType;
+  providerName: string;
+  currentModelNames: string[];
+  newModelCreation?: boolean;
+  isRemovable?: boolean;
+};
+
+export const ModelCardPopup: FC<ModelCardPopupProps> = ({
+  isOpen,
+  isSaving,
+  setIsOpen,
+  onSave,
+  onUpdate,
+  modelName,
+  modelType,
+  providerName,
+  minifiedModel,
+  currentModelNames,
+  newModelCreation = false,
+  isRemovable = false,
+}) => {
+  const {
+    data: configuredModelData,
+    isSuccess: _isConfiguredModelDataLoaded,
+    currentData: configuredModelCurrentData,
+  } = useGetModelConfiguration({
+    modelName,
+    modelType,
+    providerName,
+  });
+
+  const { data: defaultModelData, isSuccess: isDefaultModelDataLoaded } =
+    useGetModelDefaults({
+      modelType,
+      providerName,
+    });
+  const [editedModelData, setEditedModelData] = useState<Model | undefined>(
+    configuredModelData,
+  );
+
+  const areDefaultsUnavailable = useMemo(() => {
+    const dataToCompare = {
+      ...editedModelData,
+      name: "",
+    };
+    return isEqual(defaultModelData, dataToCompare);
+  }, [defaultModelData, editedModelData]);
+
+  const isSavingDisabled = useMemo(() => {
+    if (!editedModelData?.name) {
+      return true;
+    }
+    const isNameTaken = currentModelNames.some(
+      (existingName) =>
+        existingName === editedModelData.name && existingName !== modelName,
+    );
+    // TODO: maybe we should move it out somewhere :P
+    const REQUIRED_FIELD_KEYS = ["tokenizer", "n_ctx"];
+
+    const someFieldsNotFilled = Object.entries(editedModelData).some(
+      ([key, value]) => {
+        if (REQUIRED_FIELD_KEYS.includes(key)) {
+          if (!value) return true;
+        }
+
+        return false;
+      },
+    );
+
+    if (isNameTaken) return true;
+
+    return isEqual(configuredModelData, editedModelData) || someFieldsNotFilled;
+  }, [configuredModelData, editedModelData, currentModelNames, modelName]);
+
+  useEffect(() => {
+    if (isOpen) {
+      if (configuredModelData) {
+        setEditedModelData((prev) => {
+          if (isEqual(prev, configuredModelCurrentData)) return prev;
+          return configuredModelData;
+        });
+        return;
+      }
+      setEditedModelData(defaultModelData);
+    }
+  }, [
+    isOpen,
+    configuredModelData,
+    configuredModelCurrentData,
+    defaultModelData,
+    newModelCreation,
+    modelType,
+  ]);
+
+  useEffectOnce(() => {
+    return () => {
+      setEditedModelData(undefined);
+    };
+  });
+
+  const handleSetDefaultModelData = useCallback(() => {
+    if (!isDefaultModelDataLoaded) return;
+    const updatedData = {
+      ...defaultModelData,
+      name: newModelCreation ? defaultModelData.name : modelName,
+    };
+    setEditedModelData(updatedData);
+  }, [isDefaultModelDataLoaded, newModelCreation, modelName, defaultModelData]);
+
+  const handleSave = useCallback(async () => {
+    if (!isOpen || !editedModelData) return;
+
+    let isSuccess: boolean;
+
+    if (minifiedModel && minifiedModel.name !== editedModelData.name) {
+      isSuccess = await onUpdate({
+        model: editedModelData,
+        oldModel: minifiedModel,
+      });
+    } else {
+      isSuccess = await onSave(editedModelData);
+    }
+    if (!isSuccess) return;
+
+    setTimeout(() => setIsOpen(false), 0);
+  }, [isOpen, editedModelData, minifiedModel, setIsOpen, onSave, onUpdate]);
+
+  const handleCancel = useCallback(() => {
+    setTimeout(() => setIsOpen(false), 0);
+  }, [setIsOpen]);
+
+  const handleDialogChange = useCallback(
+    (open: boolean) => {
+      setIsOpen(open);
+    },
+    [setIsOpen],
+  );
+
+  const getValueByType = (value: string, valueType: string) => {
+    if (valueType === "string") return value;
+    if (valueType === "number") return parseFloat(value);
+    return value;
+  };
+
+  const updateFieldByKey = useCallback(
+    (key: string, value: string | number) => {
+      if (!editedModelData) return;
+      setEditedModelData({
+        ...editedModelData,
+        [key]: value,
+      });
+    },
+    [editedModelData],
+  );
+
+  const handleFieldValueChange = useCallback(
+    (e: ChangeEvent<HTMLInputElement>, field: string) => {
+      const valueType = typeof editedModelData?.[field as keyof Model];
+      const value = getValueByType(e.target.value, valueType);
+      updateFieldByKey(field, value);
+    },
+    [editedModelData, updateFieldByKey],
+  );
+
+  // Toggle capability value
+  const toggleCapability = (key: string) => {
+    if (!editedModelData) return;
+
+    setEditedModelData({
+      ...editedModelData,
+      [key]: !editedModelData[key as keyof typeof editedModelData],
+    });
+  };
+
+  if (!configuredModelData && !newModelCreation) {
+    return null;
+  }
+
+  if (!configuredModelData && !newModelCreation) return null;
+
+  return (
+    <Dialog.Root open={isOpen} onOpenChange={handleDialogChange}>
+      <Dialog.Content maxWidth="450px">
+        <Dialog.Title>Model Configuration</Dialog.Title>
+        <Dialog.Description size="2" mb="4">
+          {!newModelCreation
+            ? `Make changes to ${modelName} (${modelType} model)`
+            : `Setup new model for ${providerName} (${modelType} model)`}
+        </Dialog.Description>
+
+        <Flex direction="column" gap="3">
+          <FormField
+            label="Name"
+            value={editedModelData?.name}
+            onChange={(e) => handleFieldValueChange(e, "name")}
+            placeholder="Model name"
+            isDisabled={!newModelCreation && !isRemovable}
+          />
+          {editedModelData?.type === "completion" && (
+            <CompletionModelFields
+              editedModelData={editedModelData}
+              handleFieldValueChange={handleFieldValueChange}
+              updateFieldByKey={updateFieldByKey}
+            />
+          )}
+
+          {editedModelData?.type === "chat" && (
+            <ChatModelFields
+              editedModelData={editedModelData}
+              handleFieldValueChange={handleFieldValueChange}
+              setEditedModelData={setEditedModelData}
+              toggleCapability={toggleCapability}
+            />
+          )}
+
+          {editedModelData?.type === "embedding" && (
+            <EmbeddingModelFields
+              editedModelData={editedModelData}
+              handleFieldValueChange={handleFieldValueChange}
+            />
+          )}
+        </Flex>
+
+        <Flex align="center" mt="4" justify="between" width="100%">
+          <Flex gap="3" justify="end">
+            <Button variant="soft" color="gray" onClick={handleCancel}>
+              Cancel
+            </Button>
+            <Button
+              disabled={isSaving || isSavingDisabled}
+              onClick={() => void handleSave()}
+            >
+              {isSaving ? "Saving..." : "Save"}
+            </Button>
+          </Flex>
+          <Button
+            variant="outline"
+            color="gray"
+            onClick={handleSetDefaultModelData}
+            title={
+              areDefaultsUnavailable
+                ? "Your configuration matches default one"
+                : "Use model defaults"
+            }
+            disabled={areDefaultsUnavailable}
+          >
+            Use model defaults
+          </Button>
+        </Flex>
+      </Dialog.Content>
+    </Dialog.Root>
+  );
+};
+
+type CompletionModelFieldsProps = {
+  editedModelData: CodeCompletionModel;
+  handleFieldValueChange: (
+    e: ChangeEvent<HTMLInputElement>,
+    field: string,
+  ) => void;
+  updateFieldByKey: (key: string, value: string | number) => void;
+};
+
+const CompletionModelFields: FC<CompletionModelFieldsProps> = ({
+  editedModelData,
+  handleFieldValueChange,
+  updateFieldByKey,
+}) => {
+  const {
+    data: modelFamiliesData,
+    isSuccess,
+    isLoading,
+  } = useGetCompletionModelFamiliesQuery();
+  if (isLoading || !isSuccess) return <Spinner spinning />;
+
+  const aggregatedModelFamilies = [...modelFamiliesData.model_families, null];
+  return (
+    <>
+      <FormField
+        label="Context Window (n_ctx)"
+        value={editedModelData.n_ctx.toString()}
+        onChange={(e) => handleFieldValueChange(e, "n_ctx")}
+        placeholder="Context window size"
+        type="number"
+      />
+      <FormSelect
+        label="Model Family"
+        placeholder="Desired model family"
+        value={editedModelData.model_family ?? "null"}
+        onValueChange={(value) => updateFieldByKey("model_family", value)}
+        options={aggregatedModelFamilies}
+      />
+    </>
+  );
+};
+
+// Chat model specific fields
+type ChatModelFieldsProps = {
+  editedModelData?: CodeChatModel;
+  setEditedModelData: (data: Model) => void;
+  toggleCapability: (key: string) => void;
+  handleFieldValueChange: (
+    e: ChangeEvent<HTMLInputElement>,
+    field: string,
+  ) => void;
+};
+
+const ChatModelFields: FC<ChatModelFieldsProps> = ({
+  editedModelData,
+  setEditedModelData,
+  toggleCapability,
+  handleFieldValueChange,
+}) => {
+  const handleTemperatureChange = (e: React.ChangeEvent<HTMLInputElement>) => {
+    if (!editedModelData) return;
+    const value = parseFloat(e.target.value);
+    const digits = e.target.value
+      .split("")
+      .map((s) => (s === "." ? undefined : s));
+
+    if (value > 1 || digits.length > 8) {
+      e.target.value = "1";
+    }
+
+    setEditedModelData({
+      ...editedModelData,
+      type: "chat",
+      default_temperature:
+        e.target.value === "" ? null : Math.min(parseFloat(e.target.value), 1),
+    });
+  };
+
+  const handleReasoningStyleChange = (value: string) => {
+    if (!editedModelData) return;
+
+    setEditedModelData({
+      ...editedModelData,
+      type: "chat",
+      supports_boost_reasoning:
+        value === "null" ? false : editedModelData.supports_boost_reasoning,
+      supports_reasoning:
+        value === "null" ? null : (value as SupportsReasoningStyle),
+    });
+  };
+
+  if (!editedModelData) return null;
+
+  return (
+    <>
+      <FormField
+        label="Context Window (n_ctx)"
+        value={editedModelData.n_ctx.toString()}
+        onChange={(e) => handleFieldValueChange(e, "n_ctx")}
+        placeholder="Context window size"
+        type="number"
+      />
+      <FormField
+        label="Tokenizer"
+        description="'hf://' stands for 'https://huggingface.co/'"
+        value={editedModelData.tokenizer}
+        onChange={(e) => handleFieldValueChange(e, "tokenizer")}
+        placeholder="Tokenizer name"
+      />
+      <FormField
+        label="Default Temperature"
+        value={editedModelData.default_temperature?.toString() ?? ""}
+        placeholder="Default temperature"
+        type="number"
+        max="1"
+        onChange={handleTemperatureChange}
+      />
+
+      <Flex direction="column" gap="2">
+        <FormSelect
+          label="Reasoning Style"
+          value={editedModelData.supports_reasoning ?? "null"}
+          onValueChange={handleReasoningStyleChange}
+          options={SUPPORTED_REASONING_STYLES}
+          optionTransformer={extractHumanReadableReasoningType}
+        />
+        <Text as="div" size="2" weight="bold">
+          Capabilities
+        </Text>
+        <Flex gap="2" wrap="wrap">
+          <CapabilityBadge
+            name="Tools"
+            enabled={editedModelData.supports_tools}
+            onClick={() => toggleCapability("supports_tools")}
+          />
+          <CapabilityBadge
+            name="Multimodality"
+            enabled={editedModelData.supports_multimodality}
+            onClick={() => toggleCapability("supports_multimodality")}
+          />
+          <CapabilityBadge
+            name="Clicks"
+            enabled={editedModelData.supports_clicks}
+            onClick={() => toggleCapability("supports_clicks")}
+          />
+          <CapabilityBadge
+            name="Agent"
+            enabled={editedModelData.supports_agent}
+            onClick={() => toggleCapability("supports_agent")}
+          />
+          {editedModelData.supports_reasoning && (
+            <CapabilityBadge
+              name="Boost Reasoning"
+              enabled={!!editedModelData.supports_boost_reasoning}
+              onClick={() => toggleCapability("supports_boost_reasoning")}
+            />
+          )}
+        </Flex>
+      </Flex>
+    </>
+  );
+};
+
+// Embedding model specific fields
+type EmbeddingModelFieldsProps = {
+  editedModelData: EmbeddingModel;
+  handleFieldValueChange: (
+    e: ChangeEvent<HTMLInputElement>,
+    field: string,
+  ) => void;
+};
+
+const EmbeddingModelFields: FC<EmbeddingModelFieldsProps> = ({
+  editedModelData,
+  handleFieldValueChange,
+}) => {
+  return (
+    <>
+      <FormField
+        label="Embedding Size"
+        value={editedModelData.embedding_size.toString()}
+        onChange={(e) => handleFieldValueChange(e, "embedding_size")}
+        placeholder="Embedding size"
+        type="number"
+      />
+      <FormField
+        label="Rejection Threshold"
+        value={editedModelData.rejection_threshold.toString()}
+        onChange={(e) => handleFieldValueChange(e, "rejection_threshold")}
+        placeholder="Rejection threshold"
+        type="number"
+      />
+      <FormField
+        label="Embedding Batch"
+        value={editedModelData.embedding_batch.toString()}
+        onChange={(e) => handleFieldValueChange(e, "embedding_batch")}
+        placeholder="Embedding batch"
+        type="number"
+      />
+    </>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/index.ts
new file mode 100644
index 000000000..f53752ece
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/components/index.ts
@@ -0,0 +1,4 @@
+export * from "./CapabilityBadge";
+export * from "./FormField";
+export * from "./ModelCardPopup";
+export * from "./AddModelButton";
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/index.ts
new file mode 100644
index 000000000..359b7d586
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/index.ts
@@ -0,0 +1 @@
+export * from "./useModelDialogState";
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/useModelDialogState.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/useModelDialogState.ts
new file mode 100644
index 000000000..70eafb75c
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/hooks/useModelDialogState.ts
@@ -0,0 +1,241 @@
+import { useState, useCallback } from "react";
+
+import {
+  useDeleteModelMutation,
+  useGetLazyModelConfiguration,
+  useUpdateModelMutation,
+} from "../../../../../hooks/useModelsQuery";
+import { useAppDispatch } from "../../../../../hooks";
+
+import { setInformation } from "../../../../Errors/informationSlice";
+import { setError } from "../../../../Errors/errorsSlice";
+
+import { modelsApi } from "../../../../../services/refact";
+import type {
+  Model,
+  ModelType,
+  SimplifiedModel,
+} from "../../../../../services/refact";
+
+/**
+ * Custom hook for managing model dialog state with body style reset functionality
+ */
+export const useModelDialogState = ({
+  modelType,
+  providerName,
+  initialState = false,
+}: {
+  modelType: ModelType;
+  providerName: string;
+  initialState?: boolean;
+}) => {
+  const dispatch = useAppDispatch();
+
+  const [isOpen, setIsOpenState] = useState(initialState);
+  const [isSavingModel, setIsSavingModel] = useState(false);
+  const [isRemovingModel, setIsRemovingModel] = useState(false);
+  const [dropdownOpen, setDropdownOpen] = useState(false);
+
+  const getModelData = useGetLazyModelConfiguration();
+  const updateModel = useUpdateModelMutation();
+  const deleteModel = useDeleteModelMutation();
+
+  const resetBodyStyles = useCallback(() => {
+    document.body.style.pointerEvents = "";
+  }, []);
+
+  const setIsOpen = useCallback(
+    (state: boolean) => {
+      setIsOpenState(state);
+      if (!state) {
+        resetBodyStyles();
+      }
+    },
+    [resetBodyStyles],
+  );
+
+  const openDialogSafely = useCallback(() => {
+    setDropdownOpen(false);
+    // Using a small timeout to avoid style conflicts
+    setTimeout(() => {
+      setIsOpenState(true);
+    }, 10);
+  }, []);
+
+  const handleToggleModelEnabledState = useCallback(
+    async (model: SimplifiedModel) => {
+      setIsSavingModel(true);
+      const { data: modelData } = await getModelData({
+        providerName,
+        modelName: model.name,
+        modelType: modelType,
+      });
+
+      if (!modelData) {
+        setIsSavingModel(false);
+        return;
+      }
+
+      const enabled = modelData.enabled;
+
+      const response = await updateModel({
+        model: {
+          ...modelData,
+          enabled: !enabled,
+        },
+        provider: providerName,
+        type: modelType,
+      });
+
+      if (response.error) {
+        dispatch(
+          setError(
+            `Error occurred on ${enabled ? "disabling" : "enabling"} ${
+              model.name
+            } configuration. Check if your model configuration is correct`,
+          ),
+        );
+        setIsSavingModel(false);
+        return;
+      }
+
+      const actions = [
+        setInformation(
+          `Model ${model.name} ${
+            enabled ? "disabled" : "enabled"
+          } successfully!`,
+        ),
+        modelsApi.util.invalidateTags(["MODELS", "MODEL"]),
+      ];
+
+      actions.forEach((action) => dispatch(action));
+      setIsSavingModel(false);
+    },
+    [dispatch, getModelData, updateModel, modelType, providerName],
+  );
+
+  const handleRemoveModel = useCallback(
+    async ({
+      model,
+      operationType = "remove",
+      isSilent = false,
+    }: {
+      model: SimplifiedModel;
+      operationType?: "remove" | "reset";
+      isSilent?: boolean;
+    }) => {
+      const response = await deleteModel({
+        model: model.name,
+        provider: providerName,
+        type: modelType,
+      });
+
+      if (response.error) {
+        dispatch(
+          setError(
+            `Something went wrong during ${
+              operationType === "remove" ? "removal" : "reset"
+            } of ${model.name} model. Please, try again`,
+          ),
+        );
+        setIsRemovingModel(false);
+        return false;
+      }
+
+      if (!isSilent) {
+        dispatch(
+          setInformation(
+            `Model ${model.name} was ${
+              operationType === "remove" ? "removed" : "reset"
+            } successfully!`,
+          ),
+        );
+      }
+
+      dispatch(modelsApi.util.invalidateTags(["MODELS"]));
+      setIsRemovingModel(false);
+      return true;
+    },
+    [dispatch, deleteModel, providerName, modelType],
+  );
+
+  const handleResetModel = useCallback(
+    async (model: SimplifiedModel) => {
+      const isSuccess = await handleRemoveModel({
+        model,
+        operationType: "reset",
+      });
+      if (isSuccess) {
+        dispatch(modelsApi.util.invalidateTags(["MODELS"]));
+      }
+    },
+    [dispatch, handleRemoveModel],
+  );
+
+  const handleSaveModel = useCallback(
+    async (modelData: Model) => {
+      setIsSavingModel(true);
+      const response = await updateModel({
+        model: modelData,
+        provider: providerName,
+        type: modelType,
+      });
+
+      if (response.error) {
+        dispatch(
+          setError(
+            `Something went wrong during update of ${modelData.name} model. Please, try again`,
+          ),
+        );
+        setIsSavingModel(false);
+        return false;
+      }
+      const actions = [
+        setInformation(`Model ${modelData.name} was updated successfully!`),
+        modelsApi.util.invalidateTags(["MODELS"]),
+      ];
+
+      actions.forEach((action) => dispatch(action));
+      setIsSavingModel(false);
+      return true;
+    },
+    [dispatch, setIsSavingModel, providerName, modelType, updateModel],
+  );
+
+  const handleUpdateModel = useCallback(
+    async ({
+      model,
+      oldModel,
+    }: {
+      model: Model;
+      oldModel: SimplifiedModel;
+    }) => {
+      const removeResult = await handleRemoveModel({
+        model: oldModel,
+        isSilent: true,
+      });
+      if (!removeResult) return false;
+      const updateResult = await handleSaveModel(model);
+      return updateResult;
+    },
+    [handleSaveModel, handleRemoveModel],
+  );
+
+  return {
+    isOpen,
+    isSavingModel,
+    isRemovingModel,
+    setIsRemovingModel,
+    setIsSavingModel,
+    setIsOpen,
+    dropdownOpen,
+    setDropdownOpen,
+    openDialogSafely,
+    resetBodyStyles,
+    handleSaveModel,
+    handleRemoveModel,
+    handleResetModel,
+    handleUpdateModel,
+    handleToggleModelEnabledState,
+  };
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/index.ts
new file mode 100644
index 000000000..962a6b817
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/index.ts
@@ -0,0 +1 @@
+export { ProviderModelsList } from "./ProviderModelsList";
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/extractHumanReadableReasoningType.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/extractHumanReadableReasoningType.ts
new file mode 100644
index 000000000..e1b2020aa
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/extractHumanReadableReasoningType.ts
@@ -0,0 +1,26 @@
+import { SupportsReasoningStyle } from "../../../../../services/refact";
+import { BEAUTIFUL_PROVIDER_NAMES } from "../../../constants";
+
+export function isSupportsReasoningStyle(
+  data: string | null,
+): data is SupportsReasoningStyle {
+  return (
+    data === "openai" ||
+    data === "anthropic" ||
+    data === "deepseek" ||
+    data === null
+  );
+}
+
+export function extractHumanReadableReasoningType(
+  reasoningType: string | null,
+) {
+  if (!isSupportsReasoningStyle(reasoningType)) return null;
+  if (!reasoningType) return null;
+
+  const maybeReadableReasoningType = BEAUTIFUL_PROVIDER_NAMES[reasoningType];
+
+  return maybeReadableReasoningType
+    ? maybeReadableReasoningType
+    : reasoningType;
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/index.ts
new file mode 100644
index 000000000..94393d143
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/ProviderModelsList/utils/index.ts
@@ -0,0 +1 @@
+export * from "./extractHumanReadableReasoningType";
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/index.ts b/refact-agent/gui/src/features/Providers/ProviderForm/index.ts
new file mode 100644
index 000000000..a7d16ae6c
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/index.ts
@@ -0,0 +1 @@
+export { ProviderForm } from "./ProviderForm";
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/useProviderForm.ts b/refact-agent/gui/src/features/Providers/ProviderForm/useProviderForm.ts
new file mode 100644
index 000000000..ff4ed57ad
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/useProviderForm.ts
@@ -0,0 +1,54 @@
+import isEqual from "lodash.isequal";
+import { useCallback, useEffect, useMemo, useState } from "react";
+import type { Provider } from "../../../services/refact";
+import {
+  useGetConfiguredProvidersQuery,
+  useGetProviderQuery,
+} from "../../../hooks/useProvidersQuery";
+
+export function useProviderForm({ providerName }: { providerName: string }) {
+  const { data: detailedProvider, isSuccess: isProviderLoadedSuccessfully } =
+    useGetProviderQuery({
+      providerName: providerName,
+    });
+  const { data: configuredProviders } = useGetConfiguredProvidersQuery();
+
+  const [formValues, setFormValues] = useState<Provider | null>(null);
+  const [areShowingExtraFields, setAreShowingExtraFields] = useState(false);
+
+  useEffect(() => {
+    if (detailedProvider) {
+      setFormValues(detailedProvider);
+    }
+  }, [detailedProvider]);
+
+  const shouldSaveButtonBeDisabled = useMemo(() => {
+    if (!detailedProvider) return true;
+
+    const isProviderConfigured = configuredProviders?.providers.some(
+      (p) => p.name === providerName,
+    );
+    if (!isProviderConfigured) return false;
+
+    return detailedProvider.readonly || isEqual(formValues, detailedProvider);
+  }, [configuredProviders, detailedProvider, formValues, providerName]);
+
+  const handleFormValuesChange = useCallback(
+    (updatedProviderData: Provider) => {
+      setFormValues(updatedProviderData);
+    },
+    [],
+  );
+
+  return {
+    formValues,
+    setFormValues,
+    areShowingExtraFields,
+    setAreShowingExtraFields,
+    shouldSaveButtonBeDisabled,
+    handleFormValuesChange,
+    configuredProviders,
+    detailedProvider,
+    isProviderLoadedSuccessfully,
+  };
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderForm/utils.ts b/refact-agent/gui/src/features/Providers/ProviderForm/utils.ts
new file mode 100644
index 000000000..3157bd6a4
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderForm/utils.ts
@@ -0,0 +1,40 @@
+import type { Provider } from "../../../services/refact";
+
+export type AggregatedProviderFields = {
+  importantFields: Record<string, string | boolean>;
+  extraFields: Record<string, string | boolean>;
+};
+
+const EXTRA_FIELDS_KEYS = [
+  "embedding_endpoint",
+  "completion_endpoint",
+  "chat_endpoint",
+  "tokenizer_api_key",
+];
+const HIDDEN_FIELDS_KEYS = [
+  "name",
+  "readonly",
+  "enabled",
+  "supports_completion",
+];
+
+export function aggregateProviderFields(providerData: Provider) {
+  return Object.entries(providerData).reduce<AggregatedProviderFields>(
+    (acc, [key, value]) => {
+      const stringValue = value;
+
+      if (HIDDEN_FIELDS_KEYS.some((hiddenField) => hiddenField === key)) {
+        return acc;
+      }
+
+      if (EXTRA_FIELDS_KEYS.some((extraField) => extraField === key)) {
+        acc.extraFields[key] = stringValue;
+      } else {
+        acc.importantFields[key] = stringValue;
+      }
+
+      return acc;
+    },
+    { importantFields: {}, extraFields: {} },
+  );
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderPreview/ProviderPreview.tsx b/refact-agent/gui/src/features/Providers/ProviderPreview/ProviderPreview.tsx
new file mode 100644
index 000000000..0a4be0bc5
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderPreview/ProviderPreview.tsx
@@ -0,0 +1,68 @@
+import React from "react";
+import { Flex, Heading } from "@radix-ui/themes";
+
+import { ProviderForm } from "../ProviderForm";
+
+import { useProviderPreview } from "./useProviderPreview";
+import { getProviderName } from "../getProviderName";
+
+import type { SimplifiedProvider } from "../../../services/refact";
+import { DeletePopover } from "../../../components/DeletePopover";
+
+export type ProviderPreviewProps = {
+  configuredProviders: SimplifiedProvider<
+    "name" | "enabled" | "readonly" | "supports_completion"
+  >[];
+  currentProvider: SimplifiedProvider<
+    "name" | "enabled" | "readonly" | "supports_completion"
+  >;
+  handleSetCurrentProvider: (
+    provider: SimplifiedProvider<
+      "name" | "enabled" | "readonly" | "supports_completion"
+    > | null,
+  ) => void;
+};
+
+export const ProviderPreview: React.FC<ProviderPreviewProps> = ({
+  configuredProviders,
+  currentProvider,
+  handleSetCurrentProvider,
+}) => {
+  const {
+    handleDiscardChanges,
+    handleSaveChanges,
+    handleDeleteProvider,
+    isDeletingProvider,
+    isSavingProvider,
+  } = useProviderPreview(handleSetCurrentProvider);
+
+  return (
+    <Flex direction="column" align="start" height="100%">
+      <Flex justify="between" align="center" width="100%" mb="4">
+        <Heading as="h2" size="3">
+          {getProviderName(currentProvider)} Configuration
+        </Heading>
+        <DeletePopover
+          itemName={getProviderName(currentProvider)}
+          isDisabled={currentProvider.readonly}
+          isDeleting={isDeletingProvider}
+          deleteBy={currentProvider.name}
+          handleDelete={(providerName: string) =>
+            void handleDeleteProvider(providerName)
+          }
+        />
+      </Flex>
+      <ProviderForm
+        currentProvider={currentProvider}
+        handleSaveChanges={(updatedProviderData) =>
+          void handleSaveChanges(updatedProviderData)
+        }
+        isSaving={isSavingProvider}
+        isProviderConfigured={configuredProviders.some(
+          (p) => p.name === currentProvider.name,
+        )}
+        handleDiscardChanges={handleDiscardChanges}
+      />
+    </Flex>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProviderPreview/index.ts b/refact-agent/gui/src/features/Providers/ProviderPreview/index.ts
new file mode 100644
index 000000000..cd2ff1f29
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderPreview/index.ts
@@ -0,0 +1 @@
+export { ProviderPreview } from "./ProviderPreview";
diff --git a/refact-agent/gui/src/features/Providers/ProviderPreview/useProviderPreview.ts b/refact-agent/gui/src/features/Providers/ProviderPreview/useProviderPreview.ts
new file mode 100644
index 000000000..1814e85be
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderPreview/useProviderPreview.ts
@@ -0,0 +1,94 @@
+import { useCallback, useState } from "react";
+
+import { useAppDispatch } from "../../../hooks";
+import {
+  useDeleteProviderMutation,
+  useUpdateProviderMutation,
+} from "../../../hooks/useProvidersQuery";
+
+import { setInformation } from "../../Errors/informationSlice";
+import { providersApi } from "../../../services/refact";
+
+import { getProviderName } from "../getProviderName";
+
+import type { Provider, SimplifiedProvider } from "../../../services/refact";
+
+export function useProviderPreview(
+  handleSetCurrentProvider: (
+    provider: SimplifiedProvider<
+      "name" | "enabled" | "readonly" | "supports_completion"
+    > | null,
+  ) => void,
+) {
+  const dispatch = useAppDispatch();
+
+  const [isSavingProvider, setIsSavingProvider] = useState(false);
+  const [isDeletingProvider, setIsDeletingProvider] = useState(false);
+
+  const updateProvider = useUpdateProviderMutation();
+  const deleteProvider = useDeleteProviderMutation();
+
+  const handleSaveChanges = useCallback(
+    async (updatedProviderData: Provider) => {
+      setIsSavingProvider(true);
+      const response = await updateProvider(updatedProviderData);
+      if (response.error) {
+        setIsSavingProvider(false);
+        return;
+      }
+      const actions = [
+        setInformation(
+          `Provider ${getProviderName(
+            updatedProviderData,
+          )} updated successfully`,
+        ),
+        providersApi.util.invalidateTags([
+          "PROVIDER",
+          { type: "CONFIGURED_PROVIDERS", id: "LIST" },
+        ]),
+      ];
+      actions.forEach((action) => dispatch(action));
+      setIsSavingProvider(false);
+    },
+    [dispatch, updateProvider],
+  );
+
+  const handleDeleteProvider = useCallback(
+    async (providerName: string) => {
+      setIsDeletingProvider(true);
+      const response = await deleteProvider(providerName);
+
+      if (response.error) {
+        setIsDeletingProvider(false);
+        return;
+      }
+
+      const actions = [
+        setInformation(
+          `${getProviderName(
+            providerName,
+          )}'s Provider configuration was deleted successfully`,
+        ),
+        providersApi.util.resetApiState(),
+      ];
+
+      actions.forEach((action) => dispatch(action));
+      handleSetCurrentProvider(null);
+      setIsDeletingProvider(false);
+    },
+    [dispatch, deleteProvider, handleSetCurrentProvider],
+  );
+
+  const handleDiscardChanges = useCallback(() => {
+    handleSetCurrentProvider(null);
+  }, [handleSetCurrentProvider]);
+
+  return {
+    updateProvider,
+    handleDeleteProvider,
+    handleDiscardChanges,
+    handleSaveChanges,
+    isSavingProvider,
+    isDeletingProvider,
+  };
+}
diff --git a/refact-agent/gui/src/features/Providers/ProviderUpdateContext.tsx b/refact-agent/gui/src/features/Providers/ProviderUpdateContext.tsx
new file mode 100644
index 000000000..348b01cb0
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProviderUpdateContext.tsx
@@ -0,0 +1,57 @@
+import React, {
+  createContext,
+  useContext,
+  useState,
+  ReactNode,
+  useMemo,
+  useCallback,
+} from "react";
+
+type ProviderUpdateState = {
+  updatingProviders: Record<string, boolean>;
+  setProviderUpdating: (providerName: string, isUpdating: boolean) => void;
+};
+
+const ProviderUpdateContext = createContext<ProviderUpdateState | undefined>(
+  undefined,
+);
+
+export const ProviderUpdateProvider: React.FC<{ children: ReactNode }> = ({
+  children,
+}) => {
+  const [updatingProviders, setUpdatingProviders] = useState<
+    Record<string, boolean>
+  >({});
+
+  const setProviderUpdating = useCallback(
+    (providerName: string, isUpdating: boolean) => {
+      setUpdatingProviders((prev) => ({
+        ...prev,
+        [providerName]: isUpdating,
+      }));
+    },
+    [],
+  );
+
+  const value = useMemo(
+    () => ({ updatingProviders, setProviderUpdating }),
+    [updatingProviders, setProviderUpdating],
+  );
+
+  return (
+    <ProviderUpdateContext.Provider value={value}>
+      {children}
+    </ProviderUpdateContext.Provider>
+  );
+};
+
+// eslint-disable-next-line react-refresh/only-export-components
+export const useProviderUpdateContext = (): ProviderUpdateState => {
+  const context = useContext(ProviderUpdateContext);
+  if (context === undefined) {
+    throw new Error(
+      "useProviderUpdateContext must be used within a ProviderUpdateProvider",
+    );
+  }
+  return context;
+};
diff --git a/refact-agent/gui/src/features/Providers/Providers.tsx b/refact-agent/gui/src/features/Providers/Providers.tsx
new file mode 100644
index 000000000..5b6b83413
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/Providers.tsx
@@ -0,0 +1,68 @@
+import React from "react";
+import { Flex, Button } from "@radix-ui/themes";
+import { ArrowLeftIcon } from "@radix-ui/react-icons";
+
+import { ScrollArea } from "../../components/ScrollArea";
+import { PageWrapper } from "../../components/PageWrapper";
+import { Spinner } from "../../components/Spinner";
+import { ProvidersView } from "./ProvidersView";
+import { ProviderUpdateProvider } from "./ProviderUpdateContext";
+
+import { useGetConfiguredProvidersQuery } from "../../hooks/useProvidersQuery";
+
+import type { Config } from "../Config/configSlice";
+
+export type ProvidersProps = {
+  backFromProviders: () => void;
+  host: Config["host"];
+  tabbed: Config["tabbed"];
+};
+export const Providers: React.FC<ProvidersProps> = ({
+  backFromProviders,
+  host,
+  tabbed,
+}) => {
+  const { data: configuredProvidersData, isSuccess } =
+    useGetConfiguredProvidersQuery();
+
+  if (!isSuccess) return <Spinner spinning />;
+  return (
+    <PageWrapper
+      host={host}
+      style={{
+        padding: 0,
+        marginTop: 0,
+      }}
+    >
+      {host === "vscode" && !tabbed ? (
+        <Flex gap="2" pb="3">
+          <Button variant="surface" onClick={backFromProviders}>
+            <ArrowLeftIcon width="16" height="16" />
+            Back
+          </Button>
+        </Flex>
+      ) : (
+        <Button mr="auto" variant="outline" onClick={backFromProviders} mb="4">
+          Back
+        </Button>
+      )}
+      <ScrollArea scrollbars="vertical" fullHeight>
+        <Flex
+          direction="column"
+          justify="between"
+          flexGrow="1"
+          style={{
+            width: "inherit",
+            height: "100%",
+          }}
+        >
+          <ProviderUpdateProvider>
+            <ProvidersView
+              configuredProviders={configuredProvidersData.providers}
+            />
+          </ProviderUpdateProvider>
+        </Flex>
+      </ScrollArea>
+    </PageWrapper>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/ConfiguredProvidersView.tsx b/refact-agent/gui/src/features/Providers/ProvidersView/ConfiguredProvidersView.tsx
new file mode 100644
index 000000000..d142abe7f
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProvidersView/ConfiguredProvidersView.tsx
@@ -0,0 +1,81 @@
+import React from "react";
+
+import { Button, Flex, Heading, Select, Text } from "@radix-ui/themes";
+import { ProviderCard } from "../ProviderCard/ProviderCard";
+
+import type { ConfiguredProvidersResponse } from "../../../services/refact";
+import { getProviderName } from "../getProviderName";
+import { useGetConfiguredProvidersView } from "./useConfiguredProvidersView";
+
+export type ConfiguredProvidersViewProps = {
+  configuredProviders: ConfiguredProvidersResponse["providers"];
+  handleSetCurrentProvider: (
+    provider: ConfiguredProvidersResponse["providers"][number],
+  ) => void;
+};
+
+export const ConfiguredProvidersView: React.FC<
+  ConfiguredProvidersViewProps
+> = ({ configuredProviders, handleSetCurrentProvider }) => {
+  const {
+    handleAddNewProvider,
+    handlePotentialCurrentProvider,
+    notConfiguredProviderTemplates,
+    sortedConfiguredProviders,
+    potentialCurrentProvider,
+  } = useGetConfiguredProvidersView({
+    configuredProviders,
+    handleSetCurrentProvider,
+  });
+
+  return (
+    <Flex direction="column" gap="2" justify="between" height="100%">
+      <Flex direction="column" gap="2">
+        <Flex direction="column" gap="1">
+          <Heading as="h2" size="3">
+            Configured Providers
+          </Heading>
+          <Text as="p" size="2" color="gray">
+            Here you can navigate through the list of configured and available
+            providers
+          </Text>
+        </Flex>
+        {sortedConfiguredProviders.map((provider, idx) => (
+          <ProviderCard
+            key={`${provider.name}_${idx}`}
+            provider={provider}
+            setCurrentProvider={handleSetCurrentProvider}
+          />
+        ))}
+      </Flex>
+      {notConfiguredProviderTemplates.length > 0 && (
+        <Flex direction="column" gap="2">
+          <Heading as="h3" size="3">
+            Add new provider
+          </Heading>
+          <Select.Root
+            defaultValue={notConfiguredProviderTemplates[0].name}
+            size="2"
+            onValueChange={handlePotentialCurrentProvider}
+          >
+            <Select.Trigger />
+            <Select.Content variant="solid" position="popper">
+              {notConfiguredProviderTemplates.map((provider) => {
+                return (
+                  <Select.Item key={provider.name} value={provider.name}>
+                    {getProviderName(provider)}
+                  </Select.Item>
+                );
+              })}
+            </Select.Content>
+          </Select.Root>
+          {potentialCurrentProvider && (
+            <Button variant="outline" onClick={handleAddNewProvider}>
+              Configure {getProviderName(potentialCurrentProvider)}
+            </Button>
+          )}
+        </Flex>
+      )}
+    </Flex>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.module.css b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.module.css
new file mode 100644
index 000000000..42ef91e28
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.module.css
@@ -0,0 +1,14 @@
+.popup {
+  position: fixed;
+  max-width: max-content;
+  width: 80%;
+  left: 50%;
+  transform: translateX(-50%);
+  background-color: var(--accent-3);
+  bottom: 65px;
+}
+
+/* styles for IDEs (padding for pages varies on config.host (vscode, jetbrains, web) */
+.popup_ide {
+  width: calc(100vw - var(--space-2) * 2);
+}
diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.tsx b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.tsx
new file mode 100644
index 000000000..ec24f3fc7
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProvidersView/ProvidersView.tsx
@@ -0,0 +1,94 @@
+import React, { useCallback, useState } from "react";
+import { Flex } from "@radix-ui/themes";
+
+import { ConfiguredProvidersView } from "./ConfiguredProvidersView";
+
+import type {
+  ConfiguredProvidersResponse,
+  SimplifiedProvider,
+} from "../../../services/refact";
+import { ProviderPreview } from "../ProviderPreview";
+import {
+  ErrorCallout,
+  InformationCallout,
+} from "../../../components/Callout/Callout";
+import classNames from "classnames";
+import { useAppDispatch, useAppSelector } from "../../../hooks";
+import { clearError, getErrorMessage } from "../../Errors/errorsSlice";
+import {
+  clearInformation,
+  getInformationMessage,
+} from "../../Errors/informationSlice";
+
+import styles from "./ProvidersView.module.css";
+import { selectConfig } from "../../Config/configSlice";
+
+export type ProvidersViewProps = {
+  configuredProviders: ConfiguredProvidersResponse["providers"];
+};
+
+export const ProvidersView: React.FC<ProvidersViewProps> = ({
+  configuredProviders,
+}) => {
+  const dispatch = useAppDispatch();
+
+  const currentHost = useAppSelector(selectConfig).host;
+  const globalError = useAppSelector(getErrorMessage);
+  const information = useAppSelector(getInformationMessage);
+
+  const [currentProvider, setCurrentProvider] = useState<SimplifiedProvider<
+    "name" | "enabled" | "readonly" | "supports_completion"
+  > | null>(null);
+  const handleSetCurrentProvider = useCallback(
+    (
+      provider: SimplifiedProvider<
+        "name" | "enabled" | "readonly" | "supports_completion"
+      > | null,
+    ) => {
+      setCurrentProvider(provider);
+    },
+    [],
+  );
+
+  return (
+    <Flex px="1" direction="column" height="100%" width="100%">
+      {!currentProvider && (
+        <ConfiguredProvidersView
+          configuredProviders={configuredProviders}
+          handleSetCurrentProvider={handleSetCurrentProvider}
+        />
+      )}
+      {currentProvider && (
+        <ProviderPreview
+          currentProvider={currentProvider}
+          configuredProviders={configuredProviders}
+          handleSetCurrentProvider={handleSetCurrentProvider}
+        />
+      )}
+      {information && (
+        <InformationCallout
+          timeout={3000}
+          mx="0"
+          onClick={() => dispatch(clearInformation())}
+          className={classNames(styles.popup, {
+            [styles.popup_ide]: currentHost !== "web",
+          })}
+        >
+          {information}
+        </InformationCallout>
+      )}
+      {globalError && (
+        <ErrorCallout
+          mx="0"
+          timeout={3000}
+          onClick={() => dispatch(clearError())}
+          className={classNames(styles.popup, {
+            [styles.popup_ide]: currentHost !== "web",
+          })}
+        >
+          {globalError}
+        </ErrorCallout>
+      )}
+    </Flex>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/index.ts b/refact-agent/gui/src/features/Providers/ProvidersView/index.ts
new file mode 100644
index 000000000..c17996029
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProvidersView/index.ts
@@ -0,0 +1,2 @@
+export { ProvidersView } from "./ProvidersView";
+export { ConfiguredProvidersView } from "./ConfiguredProvidersView";
diff --git a/refact-agent/gui/src/features/Providers/ProvidersView/useConfiguredProvidersView.tsx b/refact-agent/gui/src/features/Providers/ProvidersView/useConfiguredProvidersView.tsx
new file mode 100644
index 000000000..ade797242
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/ProvidersView/useConfiguredProvidersView.tsx
@@ -0,0 +1,84 @@
+import { useCallback, useEffect, useMemo, useState } from "react";
+import type { SimplifiedProvider } from "../../../services/refact";
+import { useGetProviderTemplatesQuery } from "../../../hooks/useProvidersQuery";
+import { ConfiguredProvidersViewProps } from "./ConfiguredProvidersView";
+
+export function useGetConfiguredProvidersView({
+  configuredProviders,
+  handleSetCurrentProvider,
+}: {
+  configuredProviders: ConfiguredProvidersViewProps["configuredProviders"];
+  handleSetCurrentProvider: ConfiguredProvidersViewProps["handleSetCurrentProvider"];
+}) {
+  const { data: providerTemplatesData } = useGetProviderTemplatesQuery();
+
+  const notConfiguredProviderTemplates = useMemo(() => {
+    return providerTemplatesData
+      ? providerTemplatesData.provider_templates.reduce<
+          SimplifiedProvider<"name">[]
+        >((acc, provider) => {
+          if (!configuredProviders.some((p) => p.name === provider.name))
+            acc.push(provider);
+          return acc;
+        }, [])
+      : [];
+  }, [configuredProviders, providerTemplatesData]);
+
+  const [potentialCurrentProvider, setPotentialCurrentProvider] = useState<
+    SimplifiedProvider<"name"> | undefined
+  >(notConfiguredProviderTemplates[0] || undefined);
+
+  const sortedConfiguredProviders = useMemo(() => {
+    return [...configuredProviders].sort((a, b) => {
+      const getPriority = (provider: { name: string }) => {
+        if (
+          provider.name === "refact" ||
+          provider.name === "refact_self_hosted"
+        )
+          return 0;
+        if (provider.name === "custom") return 2;
+        return 1;
+      };
+
+      const priorityA = getPriority(a);
+      const priorityB = getPriority(b);
+
+      if (priorityA !== priorityB) {
+        return priorityA - priorityB;
+      }
+
+      return a.name.localeCompare(b.name);
+    });
+  }, [configuredProviders]);
+
+  const handlePotentialCurrentProvider = useCallback((value: string) => {
+    setPotentialCurrentProvider({
+      name: value,
+    });
+  }, []);
+
+  const handleAddNewProvider = useCallback(() => {
+    if (!potentialCurrentProvider) return;
+
+    handleSetCurrentProvider({
+      name: potentialCurrentProvider.name,
+      enabled: true,
+      readonly: false,
+      supports_completion: false,
+    });
+  }, [handleSetCurrentProvider, potentialCurrentProvider]);
+
+  useEffect(() => {
+    if (notConfiguredProviderTemplates.length > 0) {
+      setPotentialCurrentProvider(notConfiguredProviderTemplates[0]);
+    }
+  }, [notConfiguredProviderTemplates]);
+
+  return {
+    handlePotentialCurrentProvider,
+    handleAddNewProvider,
+    sortedConfiguredProviders,
+    notConfiguredProviderTemplates,
+    potentialCurrentProvider,
+  };
+}
diff --git a/refact-agent/gui/src/features/Providers/constants.ts b/refact-agent/gui/src/features/Providers/constants.ts
new file mode 100644
index 000000000..1ac6e76b1
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/constants.ts
@@ -0,0 +1,14 @@
+export const BEAUTIFUL_PROVIDER_NAMES: Record<string, string> = {
+  refact: "Refact Cloud",
+  refact_self_hosted: "Refact Self-Hosted",
+  openai: "OpenAI",
+  openrouter: "OpenRouter",
+  groq: "Groq", // not sure about this one
+  anthropic: "Anthropic",
+  deepseek: "DeepSeek",
+  google_gemini: "Google Gemini",
+  ollama: "Ollama",
+  lmstudio: "LM Studio",
+  xai: "xAI",
+  custom: "Custom Provider",
+};
diff --git a/refact-agent/gui/src/features/Providers/getProviderName.ts b/refact-agent/gui/src/features/Providers/getProviderName.ts
new file mode 100644
index 000000000..a299e27fd
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/getProviderName.ts
@@ -0,0 +1,10 @@
+import type { SimplifiedProvider } from "../../services/refact";
+import { BEAUTIFUL_PROVIDER_NAMES } from "./constants";
+
+export function getProviderName(provider: SimplifiedProvider | string): string {
+  if (typeof provider === "string") return BEAUTIFUL_PROVIDER_NAMES[provider];
+  const maybeName = provider.name;
+  if (!maybeName) return "Unknown Provider"; // TODO: throw error or think through it more
+  const beautyName = BEAUTIFUL_PROVIDER_NAMES[maybeName] as string | undefined;
+  return beautyName ? beautyName : maybeName;
+}
diff --git a/refact-agent/gui/src/features/Providers/icons/Anthropic.tsx b/refact-agent/gui/src/features/Providers/icons/Anthropic.tsx
new file mode 100644
index 000000000..db5806577
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/Anthropic.tsx
@@ -0,0 +1,20 @@
+import { FC, SVGProps } from "react";
+
+export const AnthropicIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      width="30px"
+      height="30px"
+      role="img"
+      viewBox="0 0 92.2 65"
+      fill="currentColor"
+      {...props}
+    >
+      <path
+        d="M66.5,0H52.4l25.7,65h14.1L66.5,0z M25.7,0L0,65h14.4l5.3-13.6h26.9L51.8,65h14.4L40.5,0C40.5,0,25.7,0,25.7,0z
+	 M24.3,39.3l8.8-22.8l8.8,22.8H24.3z"
+      ></path>
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/Custom.tsx b/refact-agent/gui/src/features/Providers/icons/Custom.tsx
new file mode 100644
index 000000000..1f29774f6
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/Custom.tsx
@@ -0,0 +1,17 @@
+import { FC, SVGProps } from "react";
+
+export const CustomIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      viewBox="0 0 512 512"
+      width="30px"
+      height="30px"
+      fill="currentColor"
+      {...props}
+    >
+      <path d="M32.554 394.593 0 512l117.407-32.554zm466.32-360.254-21.213-21.213c-17.501-17.501-46.139-17.501-63.64 0l-15.15 15.15 84.853 84.853 15.15-15.15c17.501-17.501 17.501-46.139 0-63.64M356.446 70.698l21.213-21.213 84.851 84.85-21.213 21.214zM106.794 426.412l281.458-281.458 31.82 31.82-281.459 281.458zm-53.023-53.039L335.23 91.915l31.82 31.82L85.59 405.192zm429.676-27.806c-22.075-22.075-52.101-31.349-80.86-27.828l-40.523-40.524-84.853 84.853 40.523 40.524c-2.946 24.059 3.07 49.002 18.037 69.438l89.34-89.34 76.219 76.219c18.748-36.644 12.791-82.668-17.883-113.342" />
+      <path d="m403.898 446.33-46.908 46.908c29.95 21.935 69.581 24.64 101.914 8.098zM53.096 10.665l55.006 55.006 46.908-46.909C125.06-3.173 85.43-5.878 53.096 10.665" />
+      <path d="M194.266 109.408c2.946-24.059-3.07-49.002-18.037-69.438l-89.34 89.34L10.67 53.091C-8.078 89.735-2.121 135.759 28.552 166.433c22.076 22.075 52.102 31.349 80.861 27.828l40.523 40.523 84.853-84.853z" />
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/DeepSeek.tsx b/refact-agent/gui/src/features/Providers/icons/DeepSeek.tsx
new file mode 100644
index 000000000..73416f3fd
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/DeepSeek.tsx
@@ -0,0 +1,17 @@
+import { FC, SVGProps } from "react";
+
+export const DeepSeekIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      width="30px"
+      height="30px"
+      viewBox="0 0 24 24"
+      xmlns="http://www.w3.org/2000/svg"
+      fill="currentColor"
+      fillRule="evenodd"
+      {...props}
+    >
+      <path d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 01-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 00-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 01-.465.137 9.597 9.597 0 00-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 001.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 011.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 01.415-.287.302.302 0 01.2.288.306.306 0 01-.31.307.303.303 0 01-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 01-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 01.016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 01-.254-.078c-.11-.054-.2-.19-.114-.358.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z" />
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/Gemini.tsx b/refact-agent/gui/src/features/Providers/icons/Gemini.tsx
new file mode 100644
index 000000000..b2a8e7a44
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/Gemini.tsx
@@ -0,0 +1,32 @@
+import { FC, SVGProps } from "react";
+
+export const GeminiIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      viewBox="0 0 24 24"
+      width="30px"
+      height="30px"
+      xmlns="http://www.w3.org/2000/svg"
+      {...props}
+    >
+      <defs>
+        <linearGradient
+          id="lobe-icons-gemini-fill"
+          x1="0%"
+          x2="68.73%"
+          y1="100%"
+          y2="30.395%"
+        >
+          <stop offset="0%" stopColor="#1C7DFF"></stop>
+          <stop offset="52.021%" stopColor="#1C69FF"></stop>
+          <stop offset="100%" stopColor="#F0DCD6"></stop>
+        </linearGradient>
+      </defs>
+      <path
+        d="M12 24A14.304 14.304 0 000 12 14.304 14.304 0 0012 0a14.305 14.305 0 0012 12 14.305 14.305 0 00-12 12"
+        fill="url(#lobe-icons-gemini-fill)"
+        fillRule="nonzero"
+      ></path>
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/Groq.tsx b/refact-agent/gui/src/features/Providers/icons/Groq.tsx
new file mode 100644
index 000000000..73a7a2f23
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/Groq.tsx
@@ -0,0 +1,16 @@
+import { FC, SVGProps } from "react";
+
+export const GroqIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      width="30px"
+      height="30px"
+      viewBox="0 0 30 30"
+      xmlns="http://www.w3.org/2000/svg"
+      fill="currentColor"
+      {...props}
+    >
+      <path d="M14.669 1C9.338 1 5 5.338 5 10.669C5 16 9.338 20.338 14.669 20.338H17.852V16.708H14.669C11.333 16.708 8.618 13.996 8.618 10.657C8.618 7.318 11.333 4.606 14.669 4.606C18.005 4.606 20.735 7.318 20.735 10.657V19.57C20.735 22.885 18.038 25.582 14.732 25.618C13.151 25.606 11.639 24.97 10.523 23.848L7.958 26.413C9.737 28.201 12.146 29.218 14.666 29.245V29.248C14.687 29.248 14.711 29.248 14.732 29.248H14.801V29.245C20.063 29.173 24.32 24.886 24.344 19.612L24.347 10.417C24.338 5.338 20 1 14.669 1Z" />
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/LMStudio.tsx b/refact-agent/gui/src/features/Providers/icons/LMStudio.tsx
new file mode 100644
index 000000000..f9f18c573
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/LMStudio.tsx
@@ -0,0 +1,25 @@
+import { FC, SVGProps } from "react";
+
+export const LMStudioIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      width="30"
+      height="30"
+      viewBox="0 0 24 24"
+      fillRule="evenodd"
+      xmlns="http://www.w3.org/2000/svg"
+      fill="currentColor"
+      {...props}
+    >
+      <path
+        xmlns="http://www.w3.org/2000/svg"
+        d="M2.84 2a1.273 1.273 0 100 2.547h14.107a1.273 1.273 0 100-2.547H2.84zM7.935 5.33a1.273 1.273 0 000 2.548H22.04a1.274 1.274 0 000-2.547H7.935zM3.624 9.935c0-.704.57-1.274 1.274-1.274h14.106a1.274 1.274 0 010 2.547H4.898c-.703 0-1.274-.57-1.274-1.273zM1.273 12.188a1.273 1.273 0 100 2.547H15.38a1.274 1.274 0 000-2.547H1.273zM3.624 16.792c0-.704.57-1.274 1.274-1.274h14.106a1.273 1.273 0 110 2.547H4.898c-.703 0-1.274-.57-1.274-1.273zM13.029 18.849a1.273 1.273 0 100 2.547h9.698a1.273 1.273 0 100-2.547h-9.698z"
+        fillOpacity=".3"
+      />
+      <path
+        xmlns="http://www.w3.org/2000/svg"
+        d="M2.84 2a1.273 1.273 0 100 2.547h10.287a1.274 1.274 0 000-2.547H2.84zM7.935 5.33a1.273 1.273 0 000 2.548H18.22a1.274 1.274 0 000-2.547H7.935zM3.624 9.935c0-.704.57-1.274 1.274-1.274h10.286a1.273 1.273 0 010 2.547H4.898c-.703 0-1.274-.57-1.274-1.273zM1.273 12.188a1.273 1.273 0 100 2.547H11.56a1.274 1.274 0 000-2.547H1.273zM3.624 16.792c0-.704.57-1.274 1.274-1.274h10.286a1.273 1.273 0 110 2.547H4.898c-.703 0-1.274-.57-1.274-1.273zM13.029 18.849a1.273 1.273 0 100 2.547h5.78a1.273 1.273 0 100-2.547h-5.78z"
+      />
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/Ollama.tsx b/refact-agent/gui/src/features/Providers/icons/Ollama.tsx
new file mode 100644
index 000000000..8dcd31b6b
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/Ollama.tsx
@@ -0,0 +1,20 @@
+import { FC, SVGProps } from "react";
+
+export const OllamaIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      width="30"
+      height="30"
+      viewBox="0 0 30 30"
+      xmlns="http://www.w3.org/2000/svg"
+      fill="currentColor"
+      {...props}
+    >
+      <path d="M9.00691 0.00842892C8.72319 0.053651 8.38271 0.200083 8.14263 0.380972C7.41586 0.925789 6.85276 2.08218 6.61486 3.52282C6.52538 4.06764 6.46427 4.8235 6.46427 5.40063C6.46427 6.08111 6.54502 6.95108 6.6607 7.55189C6.68689 7.68541 6.69998 7.80386 6.68907 7.81247C6.68034 7.82108 6.57339 7.90721 6.45335 8.00196C6.04304 8.32497 5.57379 8.82242 5.25077 9.27681C4.63093 10.1446 4.22934 11.1309 4.06129 12.199C3.99581 12.6211 3.97835 13.4738 4.03073 13.8959C4.1464 14.8693 4.44323 15.6919 4.95176 16.4456L5.11764 16.6889L5.06962 16.7686C4.72914 17.3328 4.43887 18.1489 4.30355 18.9328C4.1966 19.553 4.18351 19.7188 4.18351 20.55C4.18351 21.3877 4.19442 21.5535 4.29482 22.1328C4.41486 22.8262 4.6593 23.5605 4.93212 24.0494C5.02161 24.2087 5.23986 24.5403 5.26605 24.5576C5.27478 24.5619 5.24859 24.6415 5.20712 24.7341C4.89283 25.4125 4.62438 26.3147 4.51307 27.0749C4.4345 27.596 4.42359 27.764 4.42359 28.3132C4.42359 29.013 4.46287 29.3532 4.61129 29.911L4.63311 29.9928H5.56724H6.50355L6.44244 29.8787C6.06486 29.1896 6.02994 27.9104 6.35514 26.6335C6.50355 26.0434 6.67161 25.6106 6.98589 25.0141L7.17359 24.6523V24.4305C7.17359 24.2238 7.16923 24.2001 7.10157 24.0644C7.04919 23.961 6.97935 23.8728 6.85494 23.7522C6.64323 23.5498 6.49046 23.3366 6.36823 23.0738C5.83133 21.9239 5.72657 20.2162 6.10415 18.7605C6.26129 18.1532 6.52101 17.6127 6.79383 17.3177C6.97935 17.1153 7.07538 16.8892 7.07538 16.6545C7.07538 16.4111 6.98808 16.2109 6.79165 16.002C6.22855 15.4076 5.88153 14.6841 5.75712 13.8421C5.58033 12.6426 5.90117 11.3355 6.63014 10.2997C7.34383 9.28324 8.34561 8.63076 9.46528 8.45635C9.71625 8.41542 10.1855 8.42189 10.4474 8.46927C10.7333 8.51881 10.9123 8.5037 11.0956 8.41756C11.3226 8.31207 11.4361 8.18069 11.5692 7.87922C11.6871 7.61006 11.7788 7.46361 12.0254 7.15999C12.3222 6.79605 12.6081 6.5484 13.0665 6.24906C13.5903 5.91098 14.1861 5.6655 14.7798 5.54705C14.9958 5.50398 15.0962 5.49752 15.5 5.49752C15.9038 5.49752 16.0042 5.50398 16.2202 5.54705C17.0911 5.7215 17.9554 6.16511 18.6451 6.79388C18.7934 6.92954 19.1492 7.36454 19.2627 7.54546C19.3064 7.61649 19.3828 7.76726 19.4308 7.87922C19.5639 8.18069 19.6774 8.31207 19.9044 8.41756C20.0812 8.50156 20.2667 8.51881 20.5417 8.47356C20.976 8.40035 21.3099 8.40682 21.7355 8.49295C23.1847 8.7815 24.4463 9.95944 25.005 11.5379C25.4917 12.9226 25.3542 14.3718 24.6296 15.4787C24.5073 15.666 24.3851 15.8168 24.2083 16.002C23.8264 16.4047 23.8264 16.9043 24.2062 17.3177C24.8304 17.9917 25.2211 19.6499 25.1032 21.1121C25.0246 22.0768 24.7736 22.9403 24.4288 23.4292C24.3677 23.5153 24.2411 23.6617 24.145 23.7522C24.0206 23.8728 23.9508 23.961 23.8984 24.0644C23.8308 24.2001 23.8264 24.2238 23.8264 24.4305V24.6523L24.0141 25.0141C24.3284 25.6106 24.4965 26.0434 24.6448 26.6335C24.9657 27.8932 24.9373 29.1465 24.5707 29.8593C24.5401 29.9196 24.5139 29.9756 24.5139 29.982C24.5139 29.9885 24.9308 29.9928 25.4415 29.9928H26.3669L26.3909 29.9002C26.404 29.8507 26.4258 29.7753 26.4367 29.7322C26.4607 29.6375 26.5087 29.3576 26.548 29.0884C26.5851 28.817 26.5851 27.8178 26.548 27.5164C26.4084 26.4224 26.1748 25.5546 25.7929 24.7341C25.7514 24.6415 25.7252 24.5619 25.7339 24.5576C25.7449 24.5511 25.806 24.465 25.8714 24.368C26.3472 23.6574 26.6397 22.7637 26.7881 21.5837C26.8274 21.2585 26.8274 19.8609 26.7881 19.5487C26.6833 18.7433 26.5568 18.1963 26.3472 17.6429C26.2599 17.4125 26.0286 16.9258 25.9304 16.7686L25.8824 16.6889L26.0482 16.4456C26.5568 15.6919 26.8536 14.8693 26.9693 13.8959C27.0216 13.4738 27.0042 12.6211 26.9387 12.199C26.7685 11.1287 26.3691 10.1468 25.7492 9.27681C25.4262 8.82242 24.957 8.32497 24.5467 8.00196C24.4266 7.90721 24.3196 7.82108 24.3109 7.81247C24.3 7.80386 24.3131 7.68541 24.3393 7.55189C24.6034 6.19307 24.5946 4.49835 24.3175 3.17397C24.0774 2.01973 23.6409 1.10237 23.0778 0.572628C22.6282 0.150555 22.1699 -0.0303335 21.6199 0.00412146C20.3583 0.077338 19.3413 1.50937 18.9397 3.76616C18.8742 4.13009 18.8175 4.55649 18.8175 4.67276C18.8175 4.71797 18.8087 4.75458 18.7978 4.75458C18.7869 4.75458 18.7018 4.71151 18.6101 4.65769C17.6367 4.08917 16.5542 3.78555 15.5 3.78555C14.4458 3.78555 13.3633 4.08917 12.3899 4.65769C12.2982 4.71151 12.2131 4.75458 12.2022 4.75458C12.1913 4.75458 12.1825 4.71797 12.1825 4.67276C12.1825 4.55217 12.1236 4.11288 12.0603 3.76616C11.6958 1.73979 10.8599 0.3982 9.749 0.0601101C9.59623 0.0148881 9.1619 -0.0152588 9.00691 0.00842892ZM9.37798 1.76132C9.69225 2.00681 10.0415 2.70883 10.2423 3.49484C10.2794 3.63696 10.3186 3.80062 10.3296 3.8609C10.3383 3.91908 10.3623 4.05042 10.3819 4.15163C10.4671 4.60816 10.5063 5.1013 10.5107 5.70211L10.5129 6.29431L10.3623 6.51393L10.2117 6.73574H9.8603C9.45 6.73574 9.04184 6.78745 8.65116 6.8908C8.51149 6.92526 8.37616 6.95972 8.34999 6.96618C8.30851 6.97479 8.30196 6.96186 8.27796 6.78527C8.14919 5.82699 8.15574 4.76536 8.29762 3.88244C8.45474 2.89833 8.82142 2.00681 9.17934 1.7441C9.26447 1.68164 9.27975 1.6838 9.37798 1.76132ZM21.8228 1.74625C22.0389 1.90345 22.2768 2.32121 22.4536 2.85526C22.8093 3.92336 22.9097 5.38985 22.722 6.78527C22.698 6.96186 22.6915 6.97479 22.65 6.96618C22.6238 6.95972 22.4885 6.92526 22.3488 6.8908C21.9582 6.78745 21.55 6.73574 21.1397 6.73574H20.7883L20.6377 6.51393L20.4871 6.29431L20.4893 5.70211C20.4937 4.86657 20.5722 4.21409 20.7599 3.48838C20.9585 2.70883 21.3099 2.00681 21.622 1.76132C21.7203 1.6838 21.7355 1.68164 21.8228 1.74625Z" />
+      <path d="M15.1618 12.5844C14.6882 12.6296 14.5594 12.6468 14.3324 12.6921C13.9636 12.7674 13.4703 12.9354 13.1277 13.1012C11.936 13.6762 11.1154 14.6345 10.8644 15.7435C10.8142 15.9631 10.8076 16.0363 10.8076 16.4067C10.8076 16.7728 10.8142 16.8525 10.8622 17.0614C11.1961 18.5106 12.5493 19.5809 14.2997 19.7769C14.6795 19.8178 16.3207 19.8178 16.7005 19.7769C18.1061 19.6197 19.3152 18.8681 19.8587 17.8129C20.0027 17.5308 20.0725 17.3478 20.138 17.0614C20.186 16.8525 20.1926 16.7728 20.1926 16.4067C20.1926 16.0363 20.186 15.9631 20.1358 15.7435C19.7713 14.1327 18.1868 12.8644 16.2443 12.6232C15.9912 12.593 15.3277 12.5672 15.1618 12.5844ZM15.9781 13.7559C16.6263 13.8248 17.2789 14.053 17.8027 14.3976C18.0842 14.5828 18.4815 14.9704 18.6517 15.2245C18.8612 15.5389 18.9812 15.8598 19.0358 16.2495C19.0598 16.4283 19.0467 16.5639 18.9812 16.8525C18.8787 17.2832 18.56 17.7332 18.1301 18.0476C17.9293 18.1919 17.5124 18.4008 17.2571 18.4826C16.7725 18.6355 16.456 18.6635 15.3255 18.6549C14.5878 18.6485 14.4568 18.642 14.2451 18.6032C13.5227 18.4697 12.9509 18.1855 12.5362 17.7526C12.2001 17.4038 12.0473 17.0851 11.9644 16.5704C11.9273 16.3314 11.9971 15.9351 12.139 15.6013C12.3114 15.1943 12.7566 14.6883 13.1975 14.3976C13.7082 14.0616 14.3805 13.8226 14.9981 13.758C15.236 13.7322 15.7402 13.7322 15.9781 13.7559Z" />
+      <path d="M14.666 15.328C14.5001 15.4163 14.3844 15.6402 14.4193 15.806C14.4586 15.9847 14.6179 16.1657 14.8667 16.3143C14.9999 16.3939 15.0086 16.4047 15.0152 16.4844C15.0195 16.5317 15.0021 16.6674 14.9781 16.788C14.9519 16.9064 14.9322 17.0313 14.9322 17.0658C14.9344 17.1584 15.0217 17.3091 15.1133 17.3823C15.1941 17.4469 15.2094 17.4491 15.4364 17.4556C15.6437 17.462 15.6874 17.4577 15.7703 17.419C15.9842 17.3156 16.0388 17.1261 15.9602 16.7621C15.8947 16.4585 15.9078 16.4111 16.0715 16.3185C16.2439 16.2195 16.4273 16.0451 16.4818 15.9266C16.5866 15.7005 16.4905 15.4443 16.2592 15.3258C16.2025 15.2978 16.1326 15.2849 16.03 15.2849C15.8707 15.2849 15.7681 15.3215 15.5804 15.4399L15.4735 15.5067L15.4058 15.4658C15.1287 15.3043 15.0785 15.2849 14.9104 15.2871C14.7904 15.2871 14.7249 15.2978 14.666 15.328Z" />
+      <path d="M9.36732 12.8277C8.98102 12.9483 8.69291 13.2282 8.54452 13.6266C8.47249 13.8162 8.43756 14.1155 8.46811 14.277C8.54014 14.6624 8.86096 15.0135 9.22547 15.1104C9.6838 15.2288 10.0265 15.1513 10.3298 14.8541C10.5066 14.684 10.6027 14.5354 10.6987 14.2942C10.7685 14.1241 10.7729 14.094 10.7729 13.8528L10.7751 13.5944L10.6834 13.4092C10.5371 13.1163 10.2731 12.8988 9.96753 12.8191C9.7951 12.776 9.51792 12.7782 9.36732 12.8277Z" />
+      <path d="M21.0244 12.8214C20.7254 12.901 20.4591 13.1207 20.3172 13.4092L20.2256 13.5944L20.2278 13.8529C20.2278 14.094 20.2321 14.1242 20.302 14.2943C20.398 14.5355 20.494 14.6841 20.6708 14.8542C20.9742 15.1514 21.3168 15.2289 21.7752 15.1105C22.0393 15.0415 22.3033 14.8219 22.43 14.5656C22.5391 14.3481 22.5652 14.1909 22.5304 13.9433C22.4496 13.3769 22.1135 12.9656 21.6137 12.8214C21.4674 12.7783 21.1837 12.7783 21.0244 12.8214Z" />
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/OpenAI.tsx b/refact-agent/gui/src/features/Providers/icons/OpenAI.tsx
new file mode 100644
index 000000000..3c1c670e6
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/OpenAI.tsx
@@ -0,0 +1,17 @@
+import { FC, SVGProps } from "react";
+
+export const OpenAIIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      width="30px"
+      height="30px"
+      viewBox="0 0 24 24"
+      role="img"
+      fill="currentColor"
+      xmlns="http://www.w3.org/2000/svg"
+      {...props}
+    >
+      <path d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z" />
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/OpenRouter.tsx b/refact-agent/gui/src/features/Providers/icons/OpenRouter.tsx
new file mode 100644
index 000000000..12af65160
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/OpenRouter.tsx
@@ -0,0 +1,28 @@
+import { FC, SVGProps } from "react";
+
+export const OpenRouterIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      width="30px"
+      height="30px"
+      viewBox="0 0 512 512"
+      xmlns="http://www.w3.org/2000/svg"
+      fill="currentColor"
+      stroke="currentColor"
+      {...props}
+    >
+      <g clipPath="url(#clip0_205_3)">
+        <path
+          d="M3 248.945C18 248.945 76 236 106 219C136 202 136 202 198 158C276.497 102.293 332 120.945 423 120.945"
+          strokeWidth="90"
+        />
+        <path d="M511 121.5L357.25 210.268L357.25 32.7324L511 121.5Z" />
+        <path
+          d="M0 249C15 249 73 261.945 103 278.945C133 295.945 133 295.945 195 339.945C273.497 395.652 329 377 420 377"
+          strokeWidth="90"
+        />
+        <path d="M508 376.445L354.25 287.678L354.25 465.213L508 376.445Z" />
+      </g>
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/Refact.tsx b/refact-agent/gui/src/features/Providers/icons/Refact.tsx
new file mode 100644
index 000000000..c2927fce4
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/Refact.tsx
@@ -0,0 +1,24 @@
+import { FC, SVGProps } from "react";
+
+export const RefactIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      fill="none"
+      viewBox="0 0 22 34"
+      width="30px"
+      height="30px"
+      strokeWidth={1.5}
+      {...props}
+    >
+      <path
+        d="M21.5515 31.2385C20.1861 31.2385 18.9687 30.9768 17.8992 30.4534C16.8297 29.9528 15.9877 29.2019 15.3733 28.2006C14.7817 27.1994 14.4859 25.9706 14.4859 24.5142L14.4859 21.5446C14.4859 20.9075 14.2697 20.3955 13.8373 20.0086C13.4277 19.599 12.8702 19.3601 12.1648 19.2918L12.1648 15.9809C12.8702 15.9354 13.4277 15.6964 13.8373 15.2641C14.2697 14.809 14.4859 14.2856 14.4859 13.694L14.4859 10.7926C14.4859 9.35903 14.7931 8.15298 15.4075 7.17449C16.0219 6.17325 16.8638 5.41094 17.9333 4.88756C19.0028 4.34143 20.2089 4.06836 21.5515 4.06836L23.1557 4.06836L23.1557 7.72063L22.1659 7.72063C21.2556 7.72063 20.5275 8.01645 19.9813 8.60809C19.4352 9.19974 19.1621 10.0417 19.1621 11.134L19.1621 13.2502C19.1621 14.6156 18.7753 15.7078 18.0016 16.527C17.2507 17.3235 16.2949 17.881 15.1344 18.1996L15.1685 17.0049C16.3291 17.3462 17.2848 17.9379 18.0357 18.7798C18.7867 19.599 19.1621 20.6685 19.1621 21.9884L19.1621 24.1729C19.1621 25.2879 19.4352 26.1413 19.9813 26.7329C20.5275 27.3018 21.2556 27.5862 22.1659 27.5862L23.1557 27.5862L23.1557 31.2385L21.5515 31.2385Z"
+        fill="#E7150D"
+      ></path>
+      <path
+        d="M0 31.2384L0 4L4.64213 4L4.64213 31.2384L0 31.2384ZM1.57013 31.2384L1.57013 27.5861L9.35254 27.5861L9.35254 31.2384L1.57013 31.2384ZM1.57013 7.65227L1.57013 4L9.35254 4L9.35254 7.65227L1.57013 7.65227Z"
+        fill="#E7150D"
+      ></path>
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/Xai.tsx b/refact-agent/gui/src/features/Providers/icons/Xai.tsx
new file mode 100644
index 000000000..9340fd6e5
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/Xai.tsx
@@ -0,0 +1,16 @@
+import { FC, SVGProps } from "react";
+
+export const XaiIcon: FC<SVGProps<SVGSVGElement>> = (props) => {
+  return (
+    <svg
+      width="30"
+      height="30"
+      viewBox="0 0 24 24"
+      xmlns="http://www.w3.org/2000/svg"
+      fill="currentColor"
+      {...props}
+    >
+      <path d="M2.30047 8.77631L12.0474 23H16.3799L6.63183 8.77631H2.30047ZM6.6285 16.6762L2.29492 23H6.63072L8.79584 19.8387L6.6285 16.6762ZM17.3709 1L9.88007 11.9308L12.0474 15.0944L21.7067 1H17.3709ZM18.1555 7.76374V23H21.7067V2.5818L18.1555 7.76374Z"></path>
+    </svg>
+  );
+};
diff --git a/refact-agent/gui/src/features/Providers/icons/iconsMap.tsx b/refact-agent/gui/src/features/Providers/icons/iconsMap.tsx
new file mode 100644
index 000000000..922703e57
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/icons/iconsMap.tsx
@@ -0,0 +1,26 @@
+import { AnthropicIcon } from "./Anthropic";
+import { CustomIcon } from "./Custom";
+import { DeepSeekIcon } from "./DeepSeek";
+import { GeminiIcon } from "./Gemini";
+import { GroqIcon } from "./Groq";
+import { LMStudioIcon } from "./LMStudio";
+import { OllamaIcon } from "./Ollama";
+import { OpenAIIcon } from "./OpenAI";
+import { OpenRouterIcon } from "./OpenRouter";
+import { RefactIcon } from "./Refact";
+import { XaiIcon } from "./Xai";
+
+export const iconsMap: Record<string, JSX.Element> = {
+  refact: <RefactIcon />,
+  refact_self_hosted: <RefactIcon />,
+  openai: <OpenAIIcon />,
+  anthropic: <AnthropicIcon />,
+  google_gemini: <GeminiIcon />,
+  openrouter: <OpenRouterIcon />,
+  deepseek: <DeepSeekIcon />,
+  groq: <GroqIcon />,
+  ollama: <OllamaIcon />,
+  lmstudio: <LMStudioIcon />,
+  xai: <XaiIcon />,
+  custom: <CustomIcon />,
+};
diff --git a/refact-agent/gui/src/features/Providers/index.ts b/refact-agent/gui/src/features/Providers/index.ts
new file mode 100644
index 000000000..68b19e53b
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/index.ts
@@ -0,0 +1 @@
+export { Providers } from "./Providers";
diff --git a/refact-agent/gui/src/features/Providers/useUpdateProvider.ts b/refact-agent/gui/src/features/Providers/useUpdateProvider.ts
new file mode 100644
index 000000000..52d42b141
--- /dev/null
+++ b/refact-agent/gui/src/features/Providers/useUpdateProvider.ts
@@ -0,0 +1,80 @@
+import { useCallback } from "react";
+
+import { providersApi } from "../../services/refact";
+import { useAppDispatch } from "../../hooks";
+
+import { getProviderName } from "./getProviderName";
+import { setError } from "../../features/Errors/errorsSlice";
+import { useProviderUpdateContext } from "./ProviderUpdateContext";
+
+import type { ProviderCardProps } from "./ProviderCard";
+
+export const useUpdateProvider = ({
+  provider,
+}: {
+  provider: ProviderCardProps["provider"];
+}) => {
+  const dispatch = useAppDispatch();
+  const { updatingProviders, setProviderUpdating } = useProviderUpdateContext();
+
+  const [getProviderData] = providersApi.useLazyGetProviderQuery();
+  const [saveProviderData] = providersApi.useUpdateProviderMutation();
+
+  // Use the provider name as the key to track state
+  // then get updating state from context
+  const providerKey = provider.name;
+  const isUpdatingEnabledState = updatingProviders[providerKey] || false;
+
+  const updateProviderEnabledState = useCallback(async () => {
+    setProviderUpdating(providerKey, true);
+
+    const { data: providerData } = await getProviderData({
+      providerName: provider.name,
+    });
+
+    if (!providerData) {
+      setProviderUpdating(providerKey, false);
+      return;
+    }
+
+    const enabled = providerData.enabled;
+
+    const response = await saveProviderData({
+      ...providerData,
+      enabled: !enabled,
+    });
+
+    if (response.error) {
+      dispatch(
+        setError(
+          `Error occurred on updating ${getProviderName(
+            provider,
+          )} configuration. Check if your provider configuration is correct`,
+        ),
+      );
+      setProviderUpdating(providerKey, false);
+      return;
+    }
+
+    dispatch(
+      providersApi.util.invalidateTags([
+        { type: "CONFIGURED_PROVIDERS", id: "LIST" },
+      ]),
+    );
+    setTimeout(() => {
+      setProviderUpdating(providerKey, false);
+    }, 500);
+  }, [
+    dispatch,
+    getProviderData,
+    saveProviderData,
+    provider,
+    providerKey,
+    setProviderUpdating,
+  ]);
+
+  return {
+    updateProviderEnabledState,
+    isUpdatingEnabledState,
+  };
+};
diff --git a/refact-agent/gui/src/hooks/useCanUseTools.ts b/refact-agent/gui/src/hooks/useCanUseTools.ts
index 7891cc2fa..e781175bd 100644
--- a/refact-agent/gui/src/hooks/useCanUseTools.ts
+++ b/refact-agent/gui/src/hooks/useCanUseTools.ts
@@ -3,7 +3,7 @@ import { useAppSelector } from "./useAppSelector";
 import { useGetToolsQuery } from "./useGetToolsQuery";
 import { useGetCapsQuery } from "./useGetCapsQuery";
 import { selectModel } from "../features/Chat/Thread/selectors";
-import { CodeChatModel } from "../services/refact/caps";
+import { CodeChatModel } from "../services/refact/models";
 
 export const useCanUseTools = () => {
   const capsRequest = useGetCapsQuery();
@@ -19,10 +19,10 @@ export const useCanUseTools = () => {
     if (!capsRequest.data) return false;
     if (!toolsRequest.data) return false;
     if (toolsRequest.data.length === 0) return false;
-    const modelName = chatModel || capsRequest.data.code_chat_default_model;
+    const modelName = chatModel || capsRequest.data.chat_default_model;
 
-    if (!(modelName in capsRequest.data.code_chat_models)) return false;
-    const model: CodeChatModel = capsRequest.data.code_chat_models[modelName];
+    if (!(modelName in capsRequest.data.chat_models)) return false;
+    const model: CodeChatModel = capsRequest.data.chat_models[modelName];
     if ("supports_tools" in model && model.supports_tools) return true;
     return false;
   }, [capsRequest.data, toolsRequest.data, chatModel]);
diff --git a/refact-agent/gui/src/hooks/useCapsForToolUse.ts b/refact-agent/gui/src/hooks/useCapsForToolUse.ts
index e97d2294d..0a6046f9a 100644
--- a/refact-agent/gui/src/hooks/useCapsForToolUse.ts
+++ b/refact-agent/gui/src/hooks/useCapsForToolUse.ts
@@ -30,7 +30,7 @@ export function useCapsForToolUse() {
   const toolUse = useAppSelector(selectThreadToolUse);
   const dispatch = useAppDispatch();
 
-  const defaultCap = caps.data?.code_chat_default_model ?? "";
+  const defaultCap = caps.data?.chat_default_model ?? "";
 
   const selectedModel = useAppSelector(getSelectedChatModel);
 
@@ -38,40 +38,40 @@ export function useCapsForToolUse() {
 
   const setCapModel = useCallback(
     (value: string) => {
-      const model = caps.data?.code_chat_default_model === value ? "" : value;
+      const model = caps.data?.chat_default_model === value ? "" : value;
       const action = setChatModel(model);
       dispatch(action);
       const tokens =
-        caps.data?.code_chat_models[value]?.n_ctx ?? DEFAULT_MAX_NEW_TOKENS;
+        caps.data?.chat_models[value]?.n_ctx ?? DEFAULT_MAX_NEW_TOKENS;
       dispatch(setMaxNewTokens(tokens));
     },
-    [caps.data?.code_chat_default_model, caps.data?.code_chat_models, dispatch],
+    [caps.data?.chat_default_model, caps.data?.chat_models, dispatch],
   );
 
   const isMultimodalitySupportedForCurrentModel = useMemo(() => {
-    const models = caps.data?.code_chat_models;
+    const models = caps.data?.chat_models;
     const item = models?.[currentModel];
     if (!item) return false;
     if (!item.supports_multimodality) return false;
     return true;
-  }, [caps.data?.code_chat_models, currentModel]);
+  }, [caps.data?.chat_models, currentModel]);
 
   const modelsSupportingTools = useMemo(() => {
-    const models = caps.data?.code_chat_models ?? {};
+    const models = caps.data?.chat_models ?? {};
     return Object.entries(models)
       .filter(([_, value]) => value.supports_tools)
       .map(([key]) => key);
-  }, [caps.data?.code_chat_models]);
+  }, [caps.data?.chat_models]);
 
   const modelsSupportingAgent = useMemo(() => {
-    const models = caps.data?.code_chat_models ?? {};
+    const models = caps.data?.chat_models ?? {};
     return Object.entries(models)
       .filter(([_, value]) => value.supports_agent)
       .map(([key]) => key);
-  }, [caps.data?.code_chat_models]);
+  }, [caps.data?.chat_models]);
 
   const usableModels = useMemo(() => {
-    const models = caps.data?.code_chat_models ?? {};
+    const models = caps.data?.chat_models ?? {};
     const items = Object.entries(models).reduce<string[]>(
       (acc, [key, value]) => {
         if (toolUse === "explore" && value.supports_tools) {
@@ -84,7 +84,7 @@ export function useCapsForToolUse() {
       [],
     );
     return items;
-  }, [caps.data?.code_chat_models, toolUse]);
+  }, [caps.data?.chat_models, toolUse]);
 
   const usableModelsForPlan = useMemo(() => {
     // TODO: keep filtering logic for the future BYOK + Cloud (to show different providers)
diff --git a/refact-agent/gui/src/hooks/useEventBusForIDE.ts b/refact-agent/gui/src/hooks/useEventBusForIDE.ts
index a45290a9d..40f7ea170 100644
--- a/refact-agent/gui/src/hooks/useEventBusForIDE.ts
+++ b/refact-agent/gui/src/hooks/useEventBusForIDE.ts
@@ -47,6 +47,10 @@ export const ideEscapeKeyPressed = createAction<string>("ide/escapeKeyPressed");
 export const ideIsChatStreaming = createAction<boolean>("ide/isChatStreaming");
 export const ideIsChatReady = createAction<boolean>("ide/isChatReady");
 
+export const ideSetCodeCompletionModel = createAction<string>(
+  "ide/setCodeCompletionModel",
+);
+
 export const ideForceReloadFileByPath = createAction<string>(
   "ide/forceReloadFileByPath",
 );
@@ -202,10 +206,17 @@ export const useEventsBusForIDE = () => {
     [postMessage],
   );
 
+  const setCodeCompletionModel = useCallback(
+    (model: string) => {
+      const action = ideSetCodeCompletionModel(model);
+      postMessage(action);
+    },
+    [postMessage],
+  );
+
   const [getCustomizationPath] = pathApi.useLazyCustomizationPathQuery();
   const [getIntegrationsPath] = pathApi.useLazyIntegrationsPathQuery();
   const [getPrivacyPath] = pathApi.useLazyPrivacyPathQuery();
-  const [getBringYourOwnKeyPath] = pathApi.useLazyBringYourOwnKeyPathQuery();
 
   // Creating a generic function to trigger different queries from RTK Query (to avoid duplicative code)
   const openFileFromPathQuery = useCallback(
@@ -242,9 +253,6 @@ export const useEventsBusForIDE = () => {
   const openPrivacyFile = () => openFileFromPathQuery(getPrivacyPath);
   const openIntegrationsFile = () => openFileFromPathQuery(getIntegrationsPath);
 
-  const openBringYourOwnKeyFile = () =>
-    openFileFromPathQuery(getBringYourOwnKeyPath);
-
   const sendToolCallToIde = useCallback(
     (toolCall: TextDocToolCall, edit: ToolEditResult, chatId: string) => {
       const action = ideToolCall({ toolCall, edit, chatId });
@@ -264,7 +272,6 @@ export const useEventsBusForIDE = () => {
     queryPathThenOpenFile,
     openCustomizationFile,
     openPrivacyFile,
-    openBringYourOwnKeyFile,
     openIntegrationsFile,
     stopFileAnimation,
     startFileAnimation,
@@ -274,5 +281,6 @@ export const useEventsBusForIDE = () => {
     setIsChatReady,
     setForceReloadFileByPath,
     sendToolCallToIde,
+    setCodeCompletionModel,
   };
 };
diff --git a/refact-agent/gui/src/hooks/useEventBusForWeb.ts b/refact-agent/gui/src/hooks/useEventBusForWeb.ts
index 1e3654fc4..6ac86129c 100644
--- a/refact-agent/gui/src/hooks/useEventBusForWeb.ts
+++ b/refact-agent/gui/src/hooks/useEventBusForWeb.ts
@@ -36,7 +36,7 @@ export function useEventBusForWeb() {
         } else if (host.type === "self") {
           setAddressURL(host.endpointAddress);
           setApiKey("any-will-work-for-local-server");
-        } else if (host.type === "enterprise") {
+        } else {
           setAddressURL(host.endpointAddress);
           setApiKey(host.apiKey);
         }
diff --git a/refact-agent/gui/src/hooks/useLinksFromLsp.ts b/refact-agent/gui/src/hooks/useLinksFromLsp.ts
index 8cf0238a1..6715c2e8c 100644
--- a/refact-agent/gui/src/hooks/useLinksFromLsp.ts
+++ b/refact-agent/gui/src/hooks/useLinksFromLsp.ts
@@ -45,8 +45,7 @@ export function useGetLinksFromLsp() {
   // TODO: add the model
   const caps = useGetCapsQuery();
 
-  const model =
-    useAppSelector(selectModel) || caps.data?.code_chat_default_model;
+  const model = useAppSelector(selectModel) || caps.data?.chat_default_model;
 
   const unCalledTools = React.useMemo(() => {
     if (messages.length === 0) return false;
diff --git a/refact-agent/gui/src/hooks/useModelsQuery.ts b/refact-agent/gui/src/hooks/useModelsQuery.ts
new file mode 100644
index 000000000..ff3bd73e4
--- /dev/null
+++ b/refact-agent/gui/src/hooks/useModelsQuery.ts
@@ -0,0 +1,38 @@
+import { modelsApi } from "../services/refact";
+
+import type { GetModelArgs, GetModelDefaultsArgs } from "../services/refact";
+
+export function useGetModelsByProviderNameQuery({
+  providerName,
+}: {
+  providerName: string;
+}) {
+  return modelsApi.useGetModelsQuery({ providerName });
+}
+
+export function useGetModelConfiguration(args: GetModelArgs) {
+  return modelsApi.useGetModelQuery(args, { skip: !args.modelName });
+}
+
+export function useGetModelDefaults(args: GetModelDefaultsArgs) {
+  return modelsApi.useGetModelDefaultsQuery(args, { skip: !args.providerName });
+}
+
+export function useGetCompletionModelFamiliesQuery() {
+  return modelsApi.useGetCompletionModelFamiliesQuery(undefined);
+}
+
+export function useGetLazyModelConfiguration() {
+  const [mutationTrigger] = modelsApi.useLazyGetModelQuery();
+  return mutationTrigger;
+}
+
+export function useUpdateModelMutation() {
+  const [mutationTrigger] = modelsApi.useUpdateModelMutation();
+  return mutationTrigger;
+}
+
+export function useDeleteModelMutation() {
+  const [mutationTrigger] = modelsApi.useDeleteModelMutation();
+  return mutationTrigger;
+}
diff --git a/refact-agent/gui/src/hooks/useProvidersQuery.ts b/refact-agent/gui/src/hooks/useProvidersQuery.ts
new file mode 100644
index 000000000..c1081843d
--- /dev/null
+++ b/refact-agent/gui/src/hooks/useProvidersQuery.ts
@@ -0,0 +1,27 @@
+import { providersApi } from "../services/refact";
+
+export function useGetConfiguredProvidersQuery() {
+  return providersApi.useGetConfiguredProvidersQuery(undefined);
+}
+
+export function useGetProviderTemplatesQuery() {
+  return providersApi.useGetProviderTemplatesQuery(undefined);
+}
+
+export function useGetProviderQuery({
+  providerName,
+}: {
+  providerName: string;
+}) {
+  return providersApi.useGetProviderQuery({ providerName });
+}
+
+export function useUpdateProviderMutation() {
+  const [mutationTrigger] = providersApi.useUpdateProviderMutation();
+  return mutationTrigger;
+}
+
+export function useDeleteProviderMutation() {
+  const [mutationTrigger] = providersApi.useDeleteProviderMutation();
+  return mutationTrigger;
+}
diff --git a/refact-agent/gui/src/hooks/useThinking.ts b/refact-agent/gui/src/hooks/useThinking.ts
index 354133f6e..6fdd05892 100644
--- a/refact-agent/gui/src/hooks/useThinking.ts
+++ b/refact-agent/gui/src/hooks/useThinking.ts
@@ -24,10 +24,10 @@ export function useThinking() {
   const { data: userData } = useGetUser();
 
   const supportsBoostReasoning = useMemo(() => {
-    const models = caps.data?.code_chat_models;
+    const models = caps.data?.chat_models;
     const item = models?.[caps.currentModel];
     return item?.supports_boost_reasoning ?? false;
-  }, [caps.data?.code_chat_models, caps.currentModel]);
+  }, [caps.data?.chat_models, caps.currentModel]);
 
   const shouldBeTeasing = useMemo(
     () => userData?.inference === "FREE",
diff --git a/refact-agent/gui/src/services/refact/caps.ts b/refact-agent/gui/src/services/refact/caps.ts
index 4abcfb8a6..e97659223 100644
--- a/refact-agent/gui/src/services/refact/caps.ts
+++ b/refact-agent/gui/src/services/refact/caps.ts
@@ -1,6 +1,7 @@
 import { RootState } from "../../app/store";
 import { CAPS_URL } from "./consts";
 import { createApi, fetchBaseQuery } from "@reduxjs/toolkit/query/react";
+import { CodeChatModel, CodeCompletionModel, EmbeddingModel } from "./models";
 
 export const capsApi = createApi({
   reducerPath: "caps",
@@ -48,59 +49,38 @@ export const capsApi = createApi({
 
 export const capsEndpoints = capsApi.endpoints;
 
-export type CodeChatModel = {
-  default_scratchpad: string;
-  n_ctx: number;
-  similar_models: string[];
-  supports_tools?: boolean | null | undefined;
-  supports_scratchpads: Record<
-    string,
-    {
-      default_system_message?: string;
-    }
-  >;
-  supports_multimodality?: boolean;
-  supports_clicks?: boolean;
-  // TODO: could be defined
-  supports_agent?: boolean;
-  supports_boost_reasoning?: boolean;
-};
-
-export type CodeCompletionModel = {
-  default_scratchpad: string;
-  n_ctx: number;
-  similar_models: string[];
-  supports_scratchpads: Record<string, Record<string, unknown>>;
-  supports_tools?: boolean;
-  supports_multimodality?: boolean;
-  supports_clicks?: boolean;
-};
-
 export type CapsResponse = {
   caps_version: number;
   cloud_name: string;
-  code_chat_default_model: string;
+
+  chat_default_model: string;
+  chat_models: Record<string, CodeChatModel>;
   code_chat_default_system_prompt: string;
-  code_chat_models: Record<string, CodeChatModel>;
-  code_completion_default_model: string;
-  code_completion_models: Record<string, CodeCompletionModel>;
+  completion_models: Record<string, CodeCompletionModel>;
+  completion_default_model: string;
   code_completion_n_ctx: number;
+  embedding_model?: EmbeddingModel;
+  chat_thinking_model: string;
+  chat_light_model: string;
+
   endpoint_chat_passthrough: string;
   endpoint_style: string;
   endpoint_template: string;
   running_models: string[];
   telemetry_basic_dest: string;
   tokenizer_path_template: string;
+  telemetry_basic_retrieve_my_own: string;
   tokenizer_rewrite_path: Record<string, unknown>;
   support_metadata: boolean;
+  customization: string;
 };
 
 export function isCapsResponse(json: unknown): json is CapsResponse {
   if (!json) return false;
   if (typeof json !== "object") return false;
-  if (!("code_chat_default_model" in json)) return false;
-  if (typeof json.code_chat_default_model !== "string") return false;
-  if (!("code_chat_models" in json)) return false;
+  if (!("chat_default_model" in json)) return false;
+  if (typeof json.chat_default_model !== "string") return false;
+  if (!("chat_models" in json)) return false;
   return true;
 }
 
diff --git a/refact-agent/gui/src/services/refact/commands.ts b/refact-agent/gui/src/services/refact/commands.ts
index b95944bba..ddb9aa915 100644
--- a/refact-agent/gui/src/services/refact/commands.ts
+++ b/refact-agent/gui/src/services/refact/commands.ts
@@ -160,6 +160,10 @@ export function isCommandCompletionResponse(
 export type DetailMessage = {
   detail: string;
 };
+
+export type DetailMessageWithErrorType = DetailMessage & {
+  errorType: "CHAT" | "GLOBAL";
+};
 export function isDetailMessage(json: unknown): json is DetailMessage {
   if (!json) return false;
   if (typeof json !== "object") return false;
@@ -167,6 +171,16 @@ export function isDetailMessage(json: unknown): json is DetailMessage {
   return true;
 }
 
+export function isDetailMessageWithErrorType(
+  json: unknown,
+): json is DetailMessageWithErrorType {
+  if (!json) return false;
+  if (typeof json !== "object") return false;
+  if (!("detail" in json)) return false;
+  if (!("errorType" in json)) return false;
+  return true;
+}
+
 export type CommandPreviewContent = {
   content: string;
   role: "context_file" | "plain_text";
diff --git a/refact-agent/gui/src/services/refact/consts.ts b/refact-agent/gui/src/services/refact/consts.ts
index 2c1ce6a49..303aa148e 100644
--- a/refact-agent/gui/src/services/refact/consts.ts
+++ b/refact-agent/gui/src/services/refact/consts.ts
@@ -43,3 +43,13 @@ export const KNOWLEDGE_UPDATE_URL = "/v1/mem-upd";
 export const KNOWLEDGE_CREATE_URL = "/v1/trajectory-save";
 
 export const COMPRESS_MESSAGES_URL = "/v1/trajectory-compress";
+
+// Providers & Models
+export const CONFIGURED_PROVIDERS_URL = "/v1/providers";
+export const PROVIDER_TEMPLATES_URL = "/v1/provider-templates";
+export const PROVIDER_URL = "/v1/provider";
+
+export const MODELS_URL = "/v1/models";
+export const MODEL_URL = "/v1/model";
+export const MODEL_DEFAULTS_URL = "/v1/model-defaults";
+export const COMPLETION_MODEL_FAMILIES_URL = "/v1/completion-model-families";
diff --git a/refact-agent/gui/src/services/refact/index.ts b/refact-agent/gui/src/services/refact/index.ts
index 6153995d8..92f3c4213 100644
--- a/refact-agent/gui/src/services/refact/index.ts
+++ b/refact-agent/gui/src/services/refact/index.ts
@@ -1,4 +1,6 @@
 export * from "./caps";
+export * from "./providers";
+export * from "./models";
 export * from "./chat";
 export * from "./commands";
 export * from "./fim";
diff --git a/refact-agent/gui/src/services/refact/models.ts b/refact-agent/gui/src/services/refact/models.ts
new file mode 100644
index 000000000..91262dd3a
--- /dev/null
+++ b/refact-agent/gui/src/services/refact/models.ts
@@ -0,0 +1,431 @@
+import { RootState } from "../../app/store";
+import {
+  COMPLETION_MODEL_FAMILIES_URL,
+  MODEL_DEFAULTS_URL,
+  MODEL_URL,
+  MODELS_URL,
+} from "./consts";
+import { createApi, fetchBaseQuery } from "@reduxjs/toolkit/query/react";
+import { hasProperty } from "../../utils";
+import { isDetailMessage } from "./commands";
+
+export const modelsApi = createApi({
+  reducerPath: "models",
+  tagTypes: ["MODELS", "MODEL"],
+  baseQuery: fetchBaseQuery({
+    prepareHeaders: (headers, { getState }) => {
+      const token = (getState() as RootState).config.apiKey;
+      if (token) {
+        headers.set("Authorization", `Bearer ${token}`);
+      }
+      return headers;
+    },
+  }),
+  endpoints: (builder) => ({
+    getModels: builder.query<ModelsResponse, GetModelsArgs>({
+      providesTags: ["MODELS"],
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${MODELS_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "GET",
+          url,
+          params: {
+            "provider-name": args.providerName,
+          },
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+        if (result.error) {
+          return { error: result.error };
+        }
+        if (!isModelsResponse(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/models",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    getModel: builder.query<Model, GetModelArgs>({
+      providesTags: ["MODEL"],
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const { modelName, modelType, providerName } = args;
+
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${MODEL_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "GET",
+          url,
+          params: {
+            provider: providerName,
+            model: modelName,
+            type: modelType,
+          },
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+        if (result.error) {
+          return { error: result.error };
+        }
+        if (!isModel(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/model",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    getModelDefaults: builder.query<Model, GetModelDefaultsArgs>({
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${MODEL_DEFAULTS_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "GET",
+          url,
+          params: {
+            provider: args.providerName,
+            type: args.modelType,
+          },
+        });
+
+        if (result.error) {
+          return { error: result.error };
+        }
+
+        if (!isModel(result.data)) {
+          return {
+            error: {
+              error: "Invalid response from /v1/model-defaults",
+              status: "CUSTOM_ERROR",
+              data: result.data,
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    getCompletionModelFamilies: builder.query<
+      CompletionModelFamiliesResponse,
+      undefined
+    >({
+      queryFn: async (_args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${COMPLETION_MODEL_FAMILIES_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "GET",
+          url,
+        });
+
+        if (result.error) {
+          return { error: result.error };
+        }
+
+        if (!isCompletionModelFamiliesResponse(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/completion-model-families",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    updateModel: builder.mutation<unknown, UpdateModelRequestBody>({
+      invalidatesTags: (_result, _error, args) => [
+        { type: "MODEL", id: args.model.name },
+      ],
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${MODEL_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "POST",
+          url,
+          body: { ...args },
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+
+        if (result.error) {
+          return { error: result.error };
+        }
+
+        // TODO: this doesn't really work, RTK Query gets FETCH_ERROR is request is failed and is dropping off actual response from the LSP :/
+        if (isDetailMessage(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/model",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    deleteModel: builder.mutation<unknown, DeleteModelRequestBody>({
+      invalidatesTags: (_result, _error, args) => [
+        { type: "MODEL", id: args.model },
+      ],
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${MODEL_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "DELETE",
+          url,
+          params: { ...args },
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+        if (result.error) {
+          return { error: result.error };
+        }
+        if (isDetailMessage(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/model",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+  }),
+  refetchOnMountOrArgChange: true,
+});
+
+export type SimplifiedModel = {
+  name: string;
+  enabled: boolean;
+  removable: boolean;
+  user_configured: boolean;
+};
+
+export type ModelsResponse = {
+  completion_models: SimplifiedModel[];
+  chat_models: SimplifiedModel[];
+  embedding_model: SimplifiedModel;
+};
+
+export type ModelType = "embedding" | "completion" | "chat";
+
+export type GetModelArgs = {
+  modelName: string;
+  providerName: string;
+  modelType: ModelType;
+};
+
+export type GetModelDefaultsArgs = Omit<GetModelArgs, "modelName">;
+
+export type GetModelsArgs = {
+  providerName: string;
+};
+
+export type UpdateModelRequestBody = {
+  provider: string;
+  model: Model;
+  type: ModelType;
+};
+
+export type DeleteModelRequestBody = Omit<UpdateModelRequestBody, "model"> & {
+  model: string;
+};
+
+export type SupportsReasoningStyle = "openai" | "anthropic" | "deepseek" | null;
+
+export type CodeChatModel = {
+  n_ctx: number;
+  name: string;
+  tokenizer: string;
+  id: string;
+
+  supports_tools: boolean;
+  supports_multimodality: boolean;
+  supports_clicks: boolean;
+  supports_agent: boolean;
+  supports_reasoning: SupportsReasoningStyle;
+  supports_boost_reasoning: boolean;
+  default_temperature: number | null;
+
+  enabled: boolean;
+
+  type: "chat";
+};
+
+export type CodeCompletionModel = {
+  n_ctx: number;
+  name: string;
+  model_family: string | null;
+  type: "completion";
+  enabled: boolean;
+};
+
+export type EmbeddingModel = {
+  n_ctx: number;
+  name: string;
+  id: string;
+  tokenizer: string;
+
+  embedding_size: number;
+  rejection_threshold: number;
+  embedding_batch: number;
+
+  enabled: boolean;
+
+  type: "embedding";
+};
+
+export function isModelsResponse(data: unknown): data is ModelsResponse {
+  // Check if data is an object
+  if (typeof data !== "object" || data === null) return false;
+
+  if (
+    !hasProperty(data, "completion_models") ||
+    !hasProperty(data, "chat_models") ||
+    !hasProperty(data, "embedding_model")
+  )
+    return false;
+
+  return true;
+}
+
+export type Model = CodeChatModel | CodeCompletionModel | EmbeddingModel;
+
+export function isCodeChatModel(data: unknown): data is CodeChatModel {
+  if (!data || typeof data !== "object") return false;
+
+  if (!("n_ctx" in data) || typeof data.n_ctx !== "number") return false;
+  if (!("name" in data) || typeof data.name !== "string") return false;
+  if (!("tokenizer" in data) || typeof data.tokenizer !== "string")
+    return false;
+
+  if (!("supports_tools" in data) || typeof data.supports_tools !== "boolean")
+    return false;
+  if (
+    !("supports_multimodality" in data) ||
+    typeof data.supports_multimodality !== "boolean"
+  )
+    return false;
+  if (!("supports_clicks" in data) || typeof data.supports_clicks !== "boolean")
+    return false;
+  if (!("supports_agent" in data) || typeof data.supports_agent !== "boolean")
+    return false;
+
+  if (!("supports_reasoning" in data)) return false;
+
+  if (
+    !("supports_boost_reasoning" in data) ||
+    typeof data.supports_boost_reasoning !== "boolean"
+  )
+    return false;
+
+  if (!("default_temperature" in data)) return false;
+  if (
+    data.default_temperature !== null &&
+    typeof data.default_temperature !== "number"
+  )
+    return false;
+
+  if (!("enabled" in data) || typeof data.enabled !== "boolean") return false;
+
+  return true;
+}
+
+export function isCodeCompletionModel(
+  data: unknown,
+): data is CodeCompletionModel {
+  if (!data || typeof data !== "object") return false;
+
+  if (!("n_ctx" in data) || typeof data.n_ctx !== "number") return false;
+  if (!("name" in data) || typeof data.name !== "string") return false;
+  if (
+    "model_family" in data &&
+    typeof data.model_family !== "string" &&
+    data.model_family !== null
+  )
+    return false;
+  if (!("enabled" in data) || typeof data.enabled !== "boolean") return false;
+
+  return true;
+}
+
+export function isEmbeddingModel(data: unknown): data is EmbeddingModel {
+  if (!data || typeof data !== "object") return false;
+
+  if (!("n_ctx" in data) || typeof data.n_ctx !== "number") return false;
+  if (!("name" in data) || typeof data.name !== "string") return false;
+  if (!("tokenizer" in data) || typeof data.tokenizer !== "string")
+    return false;
+
+  if (!("embedding_size" in data) || typeof data.embedding_size !== "number")
+    return false;
+  if (
+    !("rejection_threshold" in data) ||
+    typeof data.rejection_threshold !== "number"
+  )
+    return false;
+  if (!("embedding_batch" in data) || typeof data.embedding_batch !== "number")
+    return false;
+
+  if (!("enabled" in data) || typeof data.enabled !== "boolean") return false;
+
+  return true;
+}
+
+export function isModel(data: unknown): data is Model {
+  return (
+    isCodeChatModel(data) ||
+    isCodeCompletionModel(data) ||
+    isEmbeddingModel(data)
+  );
+}
+
+export type CompletionModelFamiliesResponse = { model_families: string[] };
+
+export function isCompletionModelFamiliesResponse(
+  data: unknown,
+): data is CompletionModelFamiliesResponse {
+  if (!data || typeof data !== "object") return false;
+  return "model_families" in data && Array.isArray(data.model_families);
+}
diff --git a/refact-agent/gui/src/services/refact/path.ts b/refact-agent/gui/src/services/refact/path.ts
index 3a6b3ed96..801fcf183 100644
--- a/refact-agent/gui/src/services/refact/path.ts
+++ b/refact-agent/gui/src/services/refact/path.ts
@@ -132,17 +132,6 @@ export const pathApi = createApi({
         );
       },
     }),
-    bringYourOwnKeyPath: builder.query<string, undefined>({
-      queryFn: async (_arg, api, extraOptions, baseQuery) => {
-        return await fetchPath(
-          api,
-          baseQuery,
-          extraOptions,
-          CONFIG_PATH_URL,
-          "/bring-your-own-key.yaml",
-        );
-      },
-    }),
     integrationsPath: builder.query<string, undefined>({
       queryFn: async (_arg, api, extraOptions, baseQuery) => {
         return await fetchPath(
diff --git a/refact-agent/gui/src/services/refact/providers.ts b/refact-agent/gui/src/services/refact/providers.ts
new file mode 100644
index 000000000..eb9592072
--- /dev/null
+++ b/refact-agent/gui/src/services/refact/providers.ts
@@ -0,0 +1,362 @@
+import { RootState } from "../../app/store";
+import { hasProperty } from "../../utils";
+import { isDetailMessage } from "./commands";
+import {
+  CONFIGURED_PROVIDERS_URL,
+  PROVIDER_TEMPLATES_URL,
+  PROVIDER_URL,
+} from "./consts";
+import { createApi, fetchBaseQuery } from "@reduxjs/toolkit/query/react";
+
+export const providersApi = createApi({
+  reducerPath: "providers",
+  tagTypes: [
+    "PROVIDERS",
+    "TEMPLATE_PROVIDERS",
+    "CONFIGURED_PROVIDERS",
+    "PROVIDER",
+  ],
+  baseQuery: fetchBaseQuery({
+    prepareHeaders: (headers, { getState }) => {
+      const token = (getState() as RootState).config.apiKey;
+      if (token) {
+        headers.set("Authorization", `Bearer ${token}`);
+      }
+      return headers;
+    },
+  }),
+  endpoints: (builder) => ({
+    getConfiguredProviders: builder.query<
+      ConfiguredProvidersResponse,
+      undefined
+    >({
+      queryFn: async (_args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${CONFIGURED_PROVIDERS_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "GET",
+          url,
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+        if (result.error) {
+          return { error: result.error };
+        }
+        if (!isConfiguredProvidersResponse(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/providers",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+      providesTags: [{ type: "CONFIGURED_PROVIDERS", id: "LIST" }],
+    }),
+    getProviderTemplates: builder.query<ProviderTemplatesResponse, undefined>({
+      providesTags: ["TEMPLATE_PROVIDERS"],
+      queryFn: async (_args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${PROVIDER_TEMPLATES_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "GET",
+          url,
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+        if (result.error) {
+          return { error: result.error };
+        }
+        if (!isProviderTemplatesResponse(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/provider-templates",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    getProvider: builder.query<Provider, { providerName: string }>({
+      providesTags: ["PROVIDER"],
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${PROVIDER_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "GET",
+          url,
+          params: {
+            "provider-name": args.providerName,
+          },
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+
+        if (result.error) {
+          return { error: result.error };
+        }
+
+        if (!isProvider(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/provider",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    updateProvider: builder.mutation<unknown, Provider>({
+      invalidatesTags: (_result, _error, args) => [
+        { type: "PROVIDER", id: args.name },
+      ],
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${PROVIDER_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "POST",
+          url,
+          body: { ...args },
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+        if (result.error) {
+          return { error: result.error };
+        }
+        if (isDetailMessage(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/provider",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+    deleteProvider: builder.mutation<unknown, string>({
+      invalidatesTags: (_result, _error, args) => [
+        { type: "PROVIDER", id: args },
+      ],
+      queryFn: async (args, api, extraOptions, baseQuery) => {
+        const state = api.getState() as RootState;
+        const port = state.config.lspPort as unknown as number;
+        const url = `http://127.0.0.1:${port}${PROVIDER_URL}`;
+
+        const result = await baseQuery({
+          ...extraOptions,
+          method: "DELETE",
+          url,
+          params: {
+            "provider-name": args,
+          },
+          credentials: "same-origin",
+          redirect: "follow",
+        });
+        if (result.error) {
+          return { error: result.error };
+        }
+        if (isDetailMessage(result.data)) {
+          return {
+            meta: result.meta,
+            error: {
+              error: "Invalid response from /v1/provider",
+              data: result.data,
+              status: "CUSTOM_ERROR",
+            },
+          };
+        }
+
+        return { data: result.data };
+      },
+    }),
+  }),
+  refetchOnMountOrArgChange: true,
+});
+
+export type Provider = {
+  name: string;
+  endpoint_style: "openai" | "hf";
+  chat_endpoint: string;
+  completion_endpoint: string;
+  embedding_endpoint: string;
+  api_key: string;
+
+  chat_default_model: string;
+  chat_thinking_model: string;
+  chat_light_model: string;
+
+  enabled: boolean;
+  readonly: boolean;
+  supports_completion?: boolean;
+};
+
+export type SimplifiedProvider<
+  T extends keyof Provider | undefined = undefined,
+> = [T] extends [undefined]
+  ? Partial<Provider>
+  : Required<Pick<Provider, T & keyof Provider>>;
+
+export type ErrorLogInstance = {
+  path: string;
+  error_line: number;
+  error_msg: string;
+};
+
+export type ConfiguredProvidersResponse = {
+  providers: SimplifiedProvider<
+    "name" | "enabled" | "readonly" | "supports_completion"
+  >[];
+  error_log: ErrorLogInstance[];
+};
+
+export type ProviderTemplatesResponse = {
+  provider_templates: SimplifiedProvider<"name">[];
+};
+
+export const providersEndpoints = providersApi.endpoints;
+
+export function isProvider(data: unknown): data is Provider {
+  if (typeof data !== "object" || data === null) return false;
+
+  if (
+    !hasProperty(data, "name") ||
+    !hasProperty(data, "endpoint_style") ||
+    !hasProperty(data, "chat_endpoint") ||
+    !hasProperty(data, "completion_endpoint") ||
+    !hasProperty(data, "embedding_endpoint") ||
+    !hasProperty(data, "api_key") ||
+    !hasProperty(data, "chat_default_model") ||
+    !hasProperty(data, "chat_thinking_model") ||
+    !hasProperty(data, "chat_light_model") ||
+    !hasProperty(data, "enabled")
+  )
+    return false;
+
+  if (typeof data.name !== "string") return false;
+  if (data.endpoint_style !== "openai" && data.endpoint_style !== "hf")
+    return false;
+  if (typeof data.chat_endpoint !== "string") return false;
+  if (typeof data.completion_endpoint !== "string") return false;
+  if (typeof data.embedding_endpoint !== "string") return false;
+  if (typeof data.api_key !== "string") return false;
+  if (typeof data.chat_default_model !== "string") return false;
+  if (typeof data.chat_thinking_model !== "string") return false;
+  if (typeof data.chat_light_model !== "string") return false;
+  if (typeof data.enabled !== "boolean") return false;
+
+  return true;
+}
+
+export function isConfiguredProvidersResponse(
+  data: unknown,
+): data is ConfiguredProvidersResponse {
+  // Check if data is an object
+  if (typeof data !== "object" || data === null) return false;
+
+  if (!hasProperty(data, "providers") || !hasProperty(data, "error_log"))
+    return false;
+
+  if (!Array.isArray(data.providers)) return false;
+
+  if (!Array.isArray(data.error_log)) return false;
+
+  for (const provider of data.providers) {
+    if (!isSimplifiedProvider(provider)) return false;
+  }
+
+  for (const errorLog of data.error_log) {
+    if (!isErrorLogInstance(errorLog)) return false;
+  }
+
+  return true;
+}
+
+export function isProviderTemplatesResponse(
+  data: unknown,
+): data is ProviderTemplatesResponse {
+  if (typeof data !== "object" || data === null) return false;
+
+  if (!hasProperty(data, "provider_templates")) return false;
+
+  if (!Array.isArray(data.provider_templates)) return false;
+
+  for (const template of data.provider_templates) {
+    if (!isSimplifiedProviderWithName(template)) return false;
+  }
+
+  return true;
+}
+
+function isSimplifiedProviderWithName(
+  template: unknown,
+): template is SimplifiedProvider<"name"> {
+  if (typeof template !== "object" || template === null) return false;
+
+  if (!hasProperty(template, "name")) return false;
+
+  return typeof template.name === "string";
+}
+
+function isSimplifiedProvider(
+  provider: unknown,
+): provider is SimplifiedProvider<"name" | "enabled"> {
+  if (typeof provider !== "object" || provider === null) return false;
+
+  if (!hasProperty(provider, "name") || !hasProperty(provider, "enabled"))
+    return false;
+
+  if (
+    hasProperty(provider, "readonly") &&
+    typeof provider.readonly !== "boolean"
+  )
+    return false;
+
+  return (
+    typeof provider.name === "string" && typeof provider.enabled === "boolean"
+  );
+}
+
+function isErrorLogInstance(errorLog: unknown): errorLog is ErrorLogInstance {
+  if (typeof errorLog !== "object" || errorLog === null) return false;
+
+  if (
+    !hasProperty(errorLog, "path") ||
+    !hasProperty(errorLog, "error_line") ||
+    !hasProperty(errorLog, "error_msg")
+  )
+    return false;
+
+  return (
+    typeof errorLog.path === "string" &&
+    typeof errorLog.error_line === "number" &&
+    typeof errorLog.error_msg === "string"
+  );
+}
diff --git a/refact-agent/gui/src/utils/hasProperty.ts b/refact-agent/gui/src/utils/hasProperty.ts
new file mode 100644
index 000000000..1e205b4d8
--- /dev/null
+++ b/refact-agent/gui/src/utils/hasProperty.ts
@@ -0,0 +1,6 @@
+export function hasProperty<T extends string>(
+  obj: object,
+  prop: T,
+): obj is { [K in T]: unknown } {
+  return prop in obj;
+}
diff --git a/refact-agent/gui/src/utils/index.ts b/refact-agent/gui/src/utils/index.ts
index e61b563f8..825c6c6ae 100644
--- a/refact-agent/gui/src/utils/index.ts
+++ b/refact-agent/gui/src/utils/index.ts
@@ -10,3 +10,4 @@ export * from "./partition";
 export * from "./fencedBackticks";
 export * from "./isAbsolutePath";
 export * from "./isDetailMessage";
+export * from "./hasProperty";
diff --git a/refact-server/Dockerfile.base b/refact-server/Dockerfile.base
index ee74375c6..62d73e0f7 100644
--- a/refact-server/Dockerfile.base
+++ b/refact-server/Dockerfile.base
@@ -34,6 +34,8 @@ RUN pip install ninja
 RUN pip install packaging==24.1 setuptools==70.0.0 setuptools-scm==8.1.0
 ENV CMAKE_ARGS="-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=60;61;70;75;80;86;89;90+PTX"
 RUN pip install -v --no-build-isolation git+https://github.com/smallcloudai/vllm@refact_v0.7.3
+# Or pull the vLLM GPU wheel directly from PyPI instead of building from source
+# RUN pip install --no-cache-dir vllm==0.7.3
 
 # there is no prebuild auto-gptq with torch 2.5.0 support
 ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"