Remove hardcoded model list from OpenAIProvider file (#8649)

chenmoneygithub · ZachParent · web-flow · commit d836fd8f3ae5 · 2025-08-14T10:51:11.000-04:00
* update openai model list

* remove hardcoded list

* increment

* revert unexpected formatting

* style

---------

Co-authored-by: Zach Parent &lt;zachparent@duck.com&gt;
diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py
@@ -188,8 +188,12 @@ def finetune(
     ) -> TrainingJob:
         from dspy import settings as settings
 
-        err = f"Provider {self.provider} does not support fine-tuning."
-        assert self.provider.finetunable, err
+        if not self.provider.finetunable:
+            raise ValueError(
+                f"Provider {self.provider} does not support fine-tuning, please specify your provider by explicitly "
+                "setting `provider` when creating the `dspy.LM` instance. For example, "
+                "`dspy.LM('openai/gpt-4.1-mini-2025-04-14', provider=dspy.OpenAIProvider())`."
+            )
 
         def thread_function_wrapper():
             return self._run_finetune_job(job)
diff --git a/dspy/clients/openai.py b/dspy/clients/openai.py
@@ -1,4 +1,3 @@
-import re
 import time
 from datetime import datetime
 from typing import Any
@@ -8,48 +7,6 @@
 from dspy.clients.provider import Provider, TrainingJob
 from dspy.clients.utils_finetune import TrainDataFormat, TrainingStatus, save_data
 
-_OPENAI_MODELS = [
-    "gpt-4-turbo",
-    "gpt-4-turbo-2024-04-09",
-    "tts-1",
-    "tts-1-1106",
-    "chatgpt-4o-latest",
-    "dall-e-2",
-    "whisper-1",
-    "gpt-3.5-turbo-instruct",
-    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-0125",
-    "babbage-002",
-    "davinci-002",
-    "gpt-4o-mini-2024-07-18",
-    "gpt-4o",
-    "dall-e-3",
-    "gpt-4o-mini",
-    "gpt-4o-2024-08-06",
-    "gpt-4o-2024-05-13",
-    "o1-preview",
-    "gpt-4o-audio-preview-2024-10-01",
-    "o1-mini-2024-09-12",
-    "gpt-4o-audio-preview",
-    "tts-1-hd",
-    "tts-1-hd-1106",
-    "o1-preview-2024-09-12",
-    "o1-mini",
-    "gpt-4-1106-preview",
-    "text-embedding-ada-002",
-    "gpt-3.5-turbo-16k",
-    "text-embedding-3-small",
-    "text-embedding-3-large",
-    "gpt-4o-realtime-preview-2024-10-01",
-    "gpt-4o-realtime-preview",
-    "gpt-3.5-turbo-1106",
-    "gpt-4-0613",
-    "gpt-4-turbo-preview",
-    "gpt-4-0125-preview",
-    "gpt-4",
-    "gpt-3.5-turbo-instruct-0914",
-]
-
 
 class TrainingJobOpenAI(TrainingJob):
     def __init__(self, *args, **kwargs):
@@ -90,24 +47,9 @@ def __init__(self):
 
     @staticmethod
     def is_provider_model(model: str) -> bool:
-        model = OpenAIProvider._remove_provider_prefix(model)
-
-        # Check if the model is a base OpenAI model
-        # TODO(enhance) The following list can be replaced with
-        # openai.models.list(), but doing so might require a key. Is there a
-        # way to get the list of models without a key?
-        if model in _OPENAI_MODELS:
-            return True
-
-        # Check if the model is a fine-tuned OpenAI model. Fine-tuned OpenAI
-        # models have the prefix "ft:<BASE_MODEL_NAME>:", followed by a string
-        # specifying the fine-tuned model. The following RegEx pattern is used
-        # to match the base model name.
-        # TODO(enhance): This part can be updated to match the actual fine-tuned
-        # model names by making a call to the OpenAI API to be more exact, but
-        # this might require an API key with the right permissions.
-        match = re.match(r"ft:([^:]+):", model)
-        if match and match.group(1) in _OPENAI_MODELS:
+        if model.startswith("openai/") or model.startswith("ft:"):
+            # Althought it looks strange, `ft:` is a unique identifer for openai finetuned models in litellm context:
+            # https://github.com/BerriAI/litellm/blob/cd893134b7974d9f21477049a373b469fff747a5/litellm/utils.py#L4495
             return True
 
         return False