More robust tool calling prompt (ggml-org#1455)

henk717 · web-flow · commit 091eb367fc7f · 2025-03-31T14:43:03.000+08:00
* More robust tool checking prompt

* Inform UI we want a tool
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -2018,27 +2018,29 @@ def transform_genparams(genparams, api_format):
                         #if auto mode, determine whether a tool is needed
                         tools_string = json.dumps(tools_array, indent=0)
                         should_use_tools = True
+                        user_start = adapter_obj.get("user_start", "### Instruction:\n\n")
+                        user_end = adapter_obj.get("user_end", "\n\n### Response:\n\n")
                         if chosen_tool=="auto":
                             temp_poll = {
-                                "prompt": f"{messages_string}\n\nAvailable Tools:\n{tools_string}\n\nBased on the above, answer in one word only (yes or no): Should a tool be used?\n\nAnswer:\n",
+                                "prompt": f"{user_start}User query:\n\n{messages_string}\n\nTool Code:\n{tools_string}Determine from the provided tool code if the user query would be best answered by a listed tool (One word: yes / no):{user_end}",
                                 "max_length":4,
-                                "temperature":0.2,
-                                "top_k":10,
+                                "temperature":0.1,
+                                "top_k":1,
                                 "rep_pen":1,
                                 "ban_eos_token":False
                                 }
                             temp_poll_result = generate(genparams=temp_poll)
-                            if temp_poll_result and "no" in temp_poll_result['text'].lower():
+                            if temp_poll_result and not "yes" in temp_poll_result['text'].lower():
                                 should_use_tools = False
                             if not args.quiet:
-                                print(f"\nDeciding if we should use a tool: {temp_poll_result['text']} ({should_use_tools})")
+                                print(f"\nRelevant tool is listed: {temp_poll_result['text']} ({should_use_tools})")
 
                         if should_use_tools:
                             messages_string += tools_string
                             messages_string += tool_json_formatting_instruction
 
                             # Set temperature low automatically if function calling
-                            genparams["temperature"] = 0.2
+                            genparams["temperature"] = 0.1
                             genparams["using_openai_tools"] = True
 
                             # Set grammar to llamacpp example grammar to force json response (see https://github.com/ggerganov/llama.cpp/blob/master/grammars/json_arr.gbnf)
@@ -2265,6 +2267,7 @@ def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
                 tool_calls = extract_json_from_string(recvtxt)
                 if tool_calls and len(tool_calls)>0:
                     recvtxt = None
+                    currfinishreason = "tool_calls"
             res = {"id": "chatcmpl-A1", "object": "chat.completion", "created": int(time.time()), "model": friendlymodelname,
                    "usage": {"prompt_tokens": prompttokens, "completion_tokens": comptokens, "total_tokens": (prompttokens+comptokens)},
                    "choices": [{"index": 0, "message": {"role": "assistant", "content": recvtxt, "tool_calls": tool_calls}, "finish_reason": currfinishreason, "logprobs":logprobsdict}]}