From 3e43654b38f62890500cab62b903e5a5eab98937 Mon Sep 17 00:00:00 2001 From: Mike Bird Date: Sat, 22 Jun 2024 11:50:27 -0400 Subject: [PATCH 1/2] update vision to gpt-4o --- docs/guides/profiles.mdx | 4 ++-- docs/settings/all-settings.mdx | 8 ++++---- docs/usage/terminal/vision.mdx | 2 +- interpreter/terminal_interface/profiles/defaults/os.py | 4 ++-- .../terminal_interface/profiles/defaults/vision.yaml | 4 ++-- tests/test_interpreter.py | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/guides/profiles.mdx b/docs/guides/profiles.mdx index 29833086f5..9c020ae775 100644 --- a/docs/guides/profiles.mdx +++ b/docs/guides/profiles.mdx @@ -18,9 +18,9 @@ from interpreter import interpreter interpreter.os = True interpreter.llm.supports_vision = True -interpreter.llm.model = "gpt-4-vision-preview" +interpreter.llm.model = "gpt-4o" -interpreter.llm.supports_functions = False +interpreter.llm.supports_functions = True interpreter.llm.context_window = 110000 interpreter.llm.max_tokens = 4096 interpreter.auto_run = True diff --git a/docs/settings/all-settings.mdx b/docs/settings/all-settings.mdx index e849aa2eab..579c76f6d3 100644 --- a/docs/settings/all-settings.mdx +++ b/docs/settings/all-settings.mdx @@ -280,7 +280,7 @@ llm: ### Vision Mode -Enables vision mode, which adds some special instructions to the prompt and switches to `gpt-4-vision-preview`. +Enables vision mode, which adds some special instructions to the prompt and switches to `gpt-4o`. ```bash Terminal @@ -288,9 +288,9 @@ interpreter --vision ``` ```python Python -interpreter.llm.model = "gpt-4-vision-preview" # Any vision supporting model +interpreter.llm.model = "gpt-4o" # Any vision supporting model interpreter.llm.supports_vision = True -interpreter.llm.supports_functions = False # If model doesn't support functions, which is the case with gpt-4-vision. +interpreter.llm.supports_functions = True interpreter.custom_instructions = """The user will show you an image of the code you write. You can view images directly. For HTML: This will be run STATELESSLY. You may NEVER write '' or `` or anything like that. It is CRITICAL TO NEVER WRITE PLACEHOLDERS. Placeholders will BREAK it. You must write the FULL HTML CODE EVERY TIME. Therefore you cannot write HTML piecemeal—write all the HTML, CSS, and possibly Javascript **in one step, in one code block**. The user will help you review it visually. @@ -302,7 +302,7 @@ If you use `plt.show()`, the resulting image will be sent to you. However, if yo loop: True llm: - model: "gpt-4-vision-preview" + model: "gpt-4o" temperature: 0 supports_vision: True supports_functions: False diff --git a/docs/usage/terminal/vision.mdx b/docs/usage/terminal/vision.mdx index 84899162bc..b136381d72 100644 --- a/docs/usage/terminal/vision.mdx +++ b/docs/usage/terminal/vision.mdx @@ -8,4 +8,4 @@ To use vision (highly experimental), run the following command: interpreter --vision ``` -If a file path to an image is found in your input, it will be loaded into the vision model (`gpt-4-vision-preview` for now). +If a file path to an image is found in your input, it will be loaded into the vision model (`gpt-4o` for now). diff --git a/interpreter/terminal_interface/profiles/defaults/os.py b/interpreter/terminal_interface/profiles/defaults/os.py index 8186d7ae49..200fab2cd9 100644 --- a/interpreter/terminal_interface/profiles/defaults/os.py +++ b/interpreter/terminal_interface/profiles/defaults/os.py @@ -6,11 +6,11 @@ interpreter.llm.supports_vision = True # interpreter.shrink_images = True # Faster but less accurate -interpreter.llm.model = "gpt-4-vision-preview" +interpreter.llm.model = "gpt-4o" interpreter.computer.import_computer_api = True -interpreter.llm.supports_functions = False +interpreter.llm.supports_functions = True interpreter.llm.context_window = 110000 interpreter.llm.max_tokens = 4096 interpreter.auto_run = True diff --git a/interpreter/terminal_interface/profiles/defaults/vision.yaml b/interpreter/terminal_interface/profiles/defaults/vision.yaml index 4e6a6d2c2e..19a9561c8b 100644 --- a/interpreter/terminal_interface/profiles/defaults/vision.yaml +++ b/interpreter/terminal_interface/profiles/defaults/vision.yaml @@ -3,10 +3,10 @@ loop: True llm: - model: "gpt-4-vision-preview" + model: "gpt-4o" temperature: 0 supports_vision: True - supports_functions: False + supports_functions: True context_window: 110000 max_tokens: 4096 custom_instructions: > diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py index ef4fa40580..d05100ac9c 100644 --- a/tests/test_interpreter.py +++ b/tests/test_interpreter.py @@ -662,9 +662,9 @@ def test_vision(): ] interpreter.llm.supports_vision = True - interpreter.llm.model = "gpt-4-vision-preview" + interpreter.llm.model = "gpt-4o" interpreter.system_message += "\nThe user will show you an image of the code you write. You can view images directly.\n\nFor HTML: This will be run STATELESSLY. You may NEVER write '' or `` or anything like that. It is CRITICAL TO NEVER WRITE PLACEHOLDERS. Placeholders will BREAK it. You must write the FULL HTML CODE EVERY TIME. Therefore you cannot write HTML piecemeal—write all the HTML, CSS, and possibly Javascript **in one step, in one code block**. The user will help you review it visually.\nIf the user submits a filepath, you will also see the image. The filepath and user image will both be in the user's message.\n\nIf you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you." - interpreter.llm.supports_functions = False + interpreter.llm.supports_functions = True interpreter.llm.context_window = 110000 interpreter.llm.max_tokens = 4096 interpreter.loop = True From 39012e84613b9a625434a0c593d8c4355cde9cea Mon Sep 17 00:00:00 2001 From: Mike Bird Date: Sat, 22 Jun 2024 11:52:12 -0400 Subject: [PATCH 2/2] gpt-4o supports function calling --- docs/settings/all-settings.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/settings/all-settings.mdx b/docs/settings/all-settings.mdx index 579c76f6d3..a8aa9f66b4 100644 --- a/docs/settings/all-settings.mdx +++ b/docs/settings/all-settings.mdx @@ -305,7 +305,7 @@ llm: model: "gpt-4o" temperature: 0 supports_vision: True - supports_functions: False + supports_functions: True context_window: 110000 max_tokens: 4096 custom_instructions: >