Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,15 +505,18 @@ async def chat_completion_stream_generator(
# any tokens that were generated but previously
# matched by partial json parsing
# only happens if we are NOT using guided decoding
if tool_parser:
if tool_choice_auto:
assert tool_parser is not None
index = len(
tool_parser.prev_tool_call_arr) - 1 if len(
tool_parser.prev_tool_call_arr) > 0 else 0
else:
index = 0

if self._should_check_for_unstreamed_tool_arg_tokens(
delta_message, output) and tool_parser:
if tool_choice_auto and \
self._should_check_for_unstreamed_tool_arg_tokens(
delta_message, output):
assert tool_parser is not None
# get the expected call based on partial JSON
# parsing which "autocompletes" the JSON
expected_call = json.dumps(
Expand Down
Loading