@@ -2018,27 +2018,29 @@ def transform_genparams(genparams, api_format):
2018
2018
#if auto mode, determine whether a tool is needed
2019
2019
tools_string = json .dumps (tools_array , indent = 0 )
2020
2020
should_use_tools = True
2021
+ user_start = adapter_obj .get ("user_start" , "### Instruction:\n \n " )
2022
+ user_end = adapter_obj .get ("user_end" , "\n \n ### Response:\n \n " )
2021
2023
if chosen_tool == "auto" :
2022
2024
temp_poll = {
2023
- "prompt" : f"{ messages_string } \n \n Available Tools :\n { tools_string } \n \n Based on the above, answer in one word only (yes or no): Should a tool be used? \n \n Answer: \n " ,
2025
+ "prompt" : f"{ user_start } User query: \n \n { messages_string } \n \n Tool Code :\n { tools_string } Determine from the provided tool code if the user query would be best answered by a listed tool (One word: yes / no): { user_end } " ,
2024
2026
"max_length" :4 ,
2025
- "temperature" :0.2 ,
2026
- "top_k" :10 ,
2027
+ "temperature" :0.1 ,
2028
+ "top_k" :1 ,
2027
2029
"rep_pen" :1 ,
2028
2030
"ban_eos_token" :False
2029
2031
}
2030
2032
temp_poll_result = generate (genparams = temp_poll )
2031
- if temp_poll_result and "no " in temp_poll_result ['text' ].lower ():
2033
+ if temp_poll_result and not "yes " in temp_poll_result ['text' ].lower ():
2032
2034
should_use_tools = False
2033
2035
if not args .quiet :
2034
- print (f"\n Deciding if we should use a tool : { temp_poll_result ['text' ]} ({ should_use_tools } )" )
2036
+ print (f"\n Relevant tool is listed : { temp_poll_result ['text' ]} ({ should_use_tools } )" )
2035
2037
2036
2038
if should_use_tools :
2037
2039
messages_string += tools_string
2038
2040
messages_string += tool_json_formatting_instruction
2039
2041
2040
2042
# Set temperature low automatically if function calling
2041
- genparams ["temperature" ] = 0.2
2043
+ genparams ["temperature" ] = 0.1
2042
2044
genparams ["using_openai_tools" ] = True
2043
2045
2044
2046
# Set grammar to llamacpp example grammar to force json response (see https://github.com/ggerganov/llama.cpp/blob/master/grammars/json_arr.gbnf)
@@ -2265,6 +2267,7 @@ def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat
2265
2267
tool_calls = extract_json_from_string (recvtxt )
2266
2268
if tool_calls and len (tool_calls )> 0 :
2267
2269
recvtxt = None
2270
+ currfinishreason = "tool_calls"
2268
2271
res = {"id" : "chatcmpl-A1" , "object" : "chat.completion" , "created" : int (time .time ()), "model" : friendlymodelname ,
2269
2272
"usage" : {"prompt_tokens" : prompttokens , "completion_tokens" : comptokens , "total_tokens" : (prompttokens + comptokens )},
2270
2273
"choices" : [{"index" : 0 , "message" : {"role" : "assistant" , "content" : recvtxt , "tool_calls" : tool_calls }, "finish_reason" : currfinishreason , "logprobs" :logprobsdict }]}
0 commit comments