diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index 6e206dfd99b6..3459f24834db 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -83,7 +83,7 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI, choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=202, total_tokens=212) + completion_tokens=10, prompt_tokens=201, total_tokens=211) message = choice.message message = chat_completion.choices[0].message @@ -140,7 +140,7 @@ async def test_single_chat_session_audio_base64encoded( choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=202, total_tokens=212) + completion_tokens=10, prompt_tokens=201, total_tokens=211) message = choice.message message = chat_completion.choices[0].message @@ -196,7 +196,7 @@ async def test_single_chat_session_input_audio( choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=202, total_tokens=212) + completion_tokens=10, prompt_tokens=201, total_tokens=211) message = choice.message message = chat_completion.choices[0].message diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index 029c9b038b04..c954fca696ff 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -92,7 +92,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI, choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=775, total_tokens=785) + completion_tokens=10, prompt_tokens=774, total_tokens=784) message = choice.message message = chat_completion.choices[0].message @@ -185,7 +185,7 @@ async def test_single_chat_session_image_base64encoded( choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=775, total_tokens=785) + completion_tokens=10, prompt_tokens=774, total_tokens=784) message = choice.message message = chat_completion.choices[0].message diff --git a/tests/entrypoints/openai/test_vision_embedding.py b/tests/entrypoints/openai/test_vision_embedding.py index f2ff4a0b07a5..cee5274561f4 100644 --- a/tests/entrypoints/openai/test_vision_embedding.py +++ b/tests/entrypoints/openai/test_vision_embedding.py @@ -93,5 +93,5 @@ async def test_image_embedding(server: RemoteOpenAIServer, model_name: str, assert len(embeddings.data) == 1 assert len(embeddings.data[0].embedding) == 3072 assert embeddings.usage.completion_tokens == 0 - assert embeddings.usage.prompt_tokens == 764 - assert embeddings.usage.total_tokens == 764 + assert embeddings.usage.prompt_tokens == 763 + assert embeddings.usage.total_tokens == 763 diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index 035e84cc0633..53f89996f0fe 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -260,9 +260,6 @@ def _process_multimodal( mm_processor = self.mm_registry.create_processor( self.model_config, tokenizer) - if isinstance(prompt, list): - prompt = tokenizer.decode(prompt) - if mm_processor_kwargs is None: mm_processor_kwargs = {}