Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions WebAgent/WebSailor/src/react_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10):
def count_tokens(self, messages, model="gpt-4o"):
try:
tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path)
except Exception as e:
except:
tokenizer = tiktoken.encoding_for_model(model)

full_message = [Message(**x) for x in messages]
Expand Down Expand Up @@ -159,4 +159,4 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M
"prediction": prediction,
"termination": termination
}
return result
return result
6 changes: 3 additions & 3 deletions WebAgent/WebWalker/src/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar
if stage1:
self.momery.append(stage1+"\n")
if len(self.momery) > 1:
yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"}")]
yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")]
else:
yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"}")]
yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")]
stage2 = self.critic_information(query, self.momery)
if stage2:
response = f'Final Answer: {stage2}'
Expand Down Expand Up @@ -205,4 +205,4 @@ def _detect_tool(self, text: str) -> Tuple[bool, str, str, str]:
func_name = text[i + len(special_func_token):j].strip()
func_args = text[j + len(special_args_token):k].strip()
text = text[:i] # Return the response before tool call, i.e., `Thought`
return (func_name is not None), func_name, func_args, text
return (func_name is not None), func_name, func_args, text
14 changes: 6 additions & 8 deletions evaluation/evaluate_deepsearch_official.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from pydantic import BaseModel
from openai import OpenAI
import concurrent.futures
from typing import Literal
import litellm
import os
import argparse
Expand Down Expand Up @@ -189,7 +187,7 @@ def aggregate_statistics(round1_file, round2_file, round3_file):
round3_stats = single_round_statistics(round3_file)

keys = round1_stats.keys()
avg_stats = {}
avg_stats = {}
for key in keys:
if isinstance(round1_stats[key], dict):

Expand Down Expand Up @@ -224,7 +222,7 @@ def single_round_statistics(input_file):

try:
tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", ""))
except Exception as e:
except Exception:
tokenizer = tiktoken.encoding_for_model("gpt-4o")

for item in contents:
Expand Down Expand Up @@ -329,7 +327,7 @@ def calculate_enhanced_statistics(round_results, round_items):

try:
tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", ""))
except Exception as e:
except Exception:
tokenizer = tiktoken.encoding_for_model("gpt-4o")

enhanced_stats = {}
Expand Down Expand Up @@ -419,7 +417,7 @@ def calculate_best_pass_at_1(query_results):
round_correct = {round_name: 0 for round_name in ["round1", "round2", "round3"]}

for query, results in query_results.items():
for round_name in ["round1", "round2", "round3"]:
for round_name in ["round1", "round2", "round3"]:
if results[round_name] == "Correct":
round_correct[round_name] += 1

Expand Down Expand Up @@ -459,10 +457,10 @@ def main():
args = parser.parse_args()

dataset = args.dataset
if dataset in ["gaia", "webwalker"]:
if dataset in ["gaia", "webwalker"]:
judge_model = "openai/qwen2.5-72b-instruct"
judge_prompt = JUDGE_PROMPT_GAIA
elif dataset in ["xbench-deepsearch"]:
elif dataset in ["xbench-deepsearch"]:
judge_prompt = JUDGE_PROMPT_XBENCH
judge_model = "google/gemini-2.0-flash-001"
elif dataset.startswith("browsecomp_zh"):
Expand Down
3 changes: 2 additions & 1 deletion inference/react_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
from tool_visit import *

OBS_START = '<tool_response>'
OBS_END = '\n</tool_response>'
OBS_END = '
</tool_response>'

MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 100))

Expand Down