From 3e43654b38f62890500cab62b903e5a5eab98937 Mon Sep 17 00:00:00 2001
From: Mike Bird <mike@arrendy.com>
Date: Sat, 22 Jun 2024 11:50:27 -0400
Subject: [PATCH 1/2] update vision to gpt-4o

---
 docs/guides/profiles.mdx                                  | 4 ++--
 docs/settings/all-settings.mdx                            | 8 ++++----
 docs/usage/terminal/vision.mdx                            | 2 +-
 interpreter/terminal_interface/profiles/defaults/os.py    | 4 ++--
 .../terminal_interface/profiles/defaults/vision.yaml      | 4 ++--
 tests/test_interpreter.py                                 | 4 ++--
 6 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/docs/guides/profiles.mdx b/docs/guides/profiles.mdx
index 29833086f5..9c020ae775 100644
--- a/docs/guides/profiles.mdx
+++ b/docs/guides/profiles.mdx
@@ -18,9 +18,9 @@ from interpreter import interpreter
 interpreter.os = True
 interpreter.llm.supports_vision = True
 
-interpreter.llm.model = "gpt-4-vision-preview"
+interpreter.llm.model = "gpt-4o"
 
-interpreter.llm.supports_functions = False
+interpreter.llm.supports_functions = True
 interpreter.llm.context_window = 110000
 interpreter.llm.max_tokens = 4096
 interpreter.auto_run = True
diff --git a/docs/settings/all-settings.mdx b/docs/settings/all-settings.mdx
index e849aa2eab..579c76f6d3 100644
--- a/docs/settings/all-settings.mdx
+++ b/docs/settings/all-settings.mdx
@@ -280,7 +280,7 @@ llm:
 
 ### Vision Mode
 
-Enables vision mode, which adds some special instructions to the prompt and switches to `gpt-4-vision-preview`.
+Enables vision mode, which adds some special instructions to the prompt and switches to `gpt-4o`.
 
 <CodeGroup>
 ```bash Terminal
@@ -288,9 +288,9 @@ interpreter --vision
 ```
 
 ```python Python
-interpreter.llm.model = "gpt-4-vision-preview" # Any vision supporting model
+interpreter.llm.model = "gpt-4o" # Any vision supporting model
 interpreter.llm.supports_vision = True
-interpreter.llm.supports_functions = False # If model doesn't support functions, which is the case with gpt-4-vision.
+interpreter.llm.supports_functions = True
 
 interpreter.custom_instructions = """The user will show you an image of the code you write. You can view images directly.
 For HTML: This will be run STATELESSLY. You may NEVER write '<!-- previous code here... --!>' or `<!-- header will go here -->` or anything like that. It is CRITICAL TO NEVER WRITE PLACEHOLDERS. Placeholders will BREAK it. You must write the FULL HTML CODE EVERY TIME. Therefore you cannot write HTML piecemeal—write all the HTML, CSS, and possibly Javascript **in one step, in one code block**. The user will help you review it visually.
@@ -302,7 +302,7 @@ If you use `plt.show()`, the resulting image will be sent to you. However, if yo
 loop: True
 
 llm:
-  model: "gpt-4-vision-preview"
+  model: "gpt-4o"
   temperature: 0
   supports_vision: True
   supports_functions: False
diff --git a/docs/usage/terminal/vision.mdx b/docs/usage/terminal/vision.mdx
index 84899162bc..b136381d72 100644
--- a/docs/usage/terminal/vision.mdx
+++ b/docs/usage/terminal/vision.mdx
@@ -8,4 +8,4 @@ To use vision (highly experimental), run the following command:
 interpreter --vision
 ```
 
-If a file path to an image is found in your input, it will be loaded into the vision model (`gpt-4-vision-preview` for now).
+If a file path to an image is found in your input, it will be loaded into the vision model (`gpt-4o` for now).
diff --git a/interpreter/terminal_interface/profiles/defaults/os.py b/interpreter/terminal_interface/profiles/defaults/os.py
index 8186d7ae49..200fab2cd9 100644
--- a/interpreter/terminal_interface/profiles/defaults/os.py
+++ b/interpreter/terminal_interface/profiles/defaults/os.py
@@ -6,11 +6,11 @@
 interpreter.llm.supports_vision = True
 # interpreter.shrink_images = True # Faster but less accurate
 
-interpreter.llm.model = "gpt-4-vision-preview"
+interpreter.llm.model = "gpt-4o"
 
 interpreter.computer.import_computer_api = True
 
-interpreter.llm.supports_functions = False
+interpreter.llm.supports_functions = True
 interpreter.llm.context_window = 110000
 interpreter.llm.max_tokens = 4096
 interpreter.auto_run = True
diff --git a/interpreter/terminal_interface/profiles/defaults/vision.yaml b/interpreter/terminal_interface/profiles/defaults/vision.yaml
index 4e6a6d2c2e..19a9561c8b 100644
--- a/interpreter/terminal_interface/profiles/defaults/vision.yaml
+++ b/interpreter/terminal_interface/profiles/defaults/vision.yaml
@@ -3,10 +3,10 @@
 loop: True
 
 llm:
-  model: "gpt-4-vision-preview"
+  model: "gpt-4o"
   temperature: 0
   supports_vision: True
-  supports_functions: False
+  supports_functions: True
   context_window: 110000
   max_tokens: 4096
   custom_instructions: >
diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py
index ef4fa40580..d05100ac9c 100644
--- a/tests/test_interpreter.py
+++ b/tests/test_interpreter.py
@@ -662,9 +662,9 @@ def test_vision():
     ]
 
     interpreter.llm.supports_vision = True
-    interpreter.llm.model = "gpt-4-vision-preview"
+    interpreter.llm.model = "gpt-4o"
     interpreter.system_message += "\nThe user will show you an image of the code you write. You can view images directly.\n\nFor HTML: This will be run STATELESSLY. You may NEVER write '<!-- previous code here... --!>' or `<!-- header will go here -->` or anything like that. It is CRITICAL TO NEVER WRITE PLACEHOLDERS. Placeholders will BREAK it. You must write the FULL HTML CODE EVERY TIME. Therefore you cannot write HTML piecemeal—write all the HTML, CSS, and possibly Javascript **in one step, in one code block**. The user will help you review it visually.\nIf the user submits a filepath, you will also see the image. The filepath and user image will both be in the user's message.\n\nIf you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you."
-    interpreter.llm.supports_functions = False
+    interpreter.llm.supports_functions = True
     interpreter.llm.context_window = 110000
     interpreter.llm.max_tokens = 4096
     interpreter.loop = True

From 39012e84613b9a625434a0c593d8c4355cde9cea Mon Sep 17 00:00:00 2001
From: Mike Bird <mike@arrendy.com>
Date: Sat, 22 Jun 2024 11:52:12 -0400
Subject: [PATCH 2/2] gpt-4o supports function calling

---
 docs/settings/all-settings.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/settings/all-settings.mdx b/docs/settings/all-settings.mdx
index 579c76f6d3..a8aa9f66b4 100644
--- a/docs/settings/all-settings.mdx
+++ b/docs/settings/all-settings.mdx
@@ -305,7 +305,7 @@ llm:
   model: "gpt-4o"
   temperature: 0
   supports_vision: True
-  supports_functions: False
+  supports_functions: True
   context_window: 110000
   max_tokens: 4096
   custom_instructions: >