Skip to content

Add support for GitHub Models #200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ POSTGRES_PASSWORD=postgres
POSTGRES_DATABASE=postgres
POSTGRES_SSL=disable

# OPENAI_CHAT_HOST can be either azure, openai, or ollama:
# OPENAI_CHAT_HOST can be either azure, openai, ollama, or github:
OPENAI_CHAT_HOST=azure
# OPENAI_EMBED_HOST can be either azure or openai:
# OPENAI_EMBED_HOST can be either azure, openai, ollama, or github:
OPENAI_EMBED_HOST=azure
# Needed for Azure:
# You also need to `azd auth login` if running this locally
Expand All @@ -28,10 +28,17 @@ AZURE_OPENAI_KEY=
OPENAICOM_KEY=YOUR-OPENAI-API-KEY
OPENAICOM_CHAT_MODEL=gpt-3.5-turbo
OPENAICOM_EMBED_MODEL=text-embedding-3-large
OPENAICOM_EMBED_MODEL_DIMENSIONS=1024
OPENAICOM_EMBED_DIMENSIONS=1024
OPENAICOM_EMBEDDING_COLUMN=embedding_3l
# Needed for Ollama:
OLLAMA_ENDPOINT=http://host.docker.internal:11434/v1
OLLAMA_CHAT_MODEL=llama3.1
OLLAMA_EMBED_MODEL=nomic-embed-text
OLLAMA_EMBEDDING_COLUMN=embedding_nomic
# Needed for GitHub Models:
GITHUB_TOKEN=YOUR-GITHUB-TOKEN
GITHUB_BASE_URL=https://models.inference.ai.azure.com
GITHUB_MODEL=gpt-4o
GITHUB_EMBED_MODEL=text-embedding-3-large
GITHUB_EMBED_DIMENSIONS=1024
GITHUB_EMBEDDING_COLUMN=embedding_3l
4 changes: 4 additions & 0 deletions evals/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ def get_openai_config() -> dict:
# azure-ai-evaluate will call DefaultAzureCredential behind the scenes,
# so we must be logged in to Azure CLI with the correct tenant
openai_config["model"] = os.environ["AZURE_OPENAI_EVAL_MODEL"]
elif os.environ.get("OPENAI_CHAT_HOST") == "ollama":
raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
elif os.environ.get("OPENAI_CHAT_HOST") == "github":
raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
else:
logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
openai_config = {"api_key": os.environ["OPENAICOM_KEY"], "model": "gpt-4"}
Expand Down
4 changes: 3 additions & 1 deletion evals/generate_ground_truth.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ def get_openai_client() -> tuple[Union[AzureOpenAI, OpenAI], str]:
)
model = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
elif OPENAI_CHAT_HOST == "ollama":
raise NotImplementedError("Ollama OpenAI Service is not supported. Switch to Azure or OpenAI.com")
raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com")
elif OPENAI_CHAT_HOST == "github":
raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com")
else:
logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
openai_client = OpenAI(api_key=os.environ["OPENAICOM_KEY"])
Expand Down
2 changes: 1 addition & 1 deletion infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ var webAppEnv = union(azureOpenAIKeyEnv, openAIComKeyEnv, [
value: openAIEmbedHost
}
{
name: 'OPENAICOM_EMBED_MODEL_DIMENSIONS'
name: 'OPENAICOM_EMBED_DIMENSIONS'
value: openAIEmbedHost == 'openaicom' ? '1024' : ''
}
{
Expand Down
8 changes: 8 additions & 0 deletions src/backend/fastapi_app/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ async def common_parameters():
openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
openai_embed_dimensions = None
embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic"
elif OPENAI_EMBED_HOST == "github":
openai_embed_deployment = None
openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "text-embedding-3-large"
openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024))
embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l"
else:
openai_embed_deployment = None
openai_embed_model = os.getenv("OPENAICOM_EMBED_MODEL") or "text-embedding-3-large"
Expand All @@ -63,6 +68,9 @@ async def common_parameters():
openai_chat_deployment = None
openai_chat_model = os.getenv("OLLAMA_CHAT_MODEL") or "phi3:3.8b"
openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
elif OPENAI_CHAT_HOST == "github":
openai_chat_deployment = None
openai_chat_model = os.getenv("GITHUB_MODEL") or "gpt-4o"
else:
openai_chat_deployment = None
openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo"
Expand Down
18 changes: 18 additions & 0 deletions src/backend/fastapi_app/openai_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ async def create_openai_chat_client(
base_url=os.getenv("OLLAMA_ENDPOINT"),
api_key="nokeyneeded",
)
elif OPENAI_CHAT_HOST == "github":
Copy link
Preview

Copilot AI May 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Consider documenting the GitHub Models branch in the client setup to clarify why no explicit model parameter is passed during initialization, ensuring future maintainability.

Suggested change
elif OPENAI_CHAT_HOST == "github":
elif OPENAI_CHAT_HOST == "github":
# The GitHub Models branch does not require an explicit model parameter during initialization.
# Instead, it relies on the GITHUB_BASE_URL and GITHUB_MODEL environment variables to configure
# the base URL and model name, respectively. This design choice ensures flexibility and avoids
# hardcoding model details in the code.

Copilot uses AI. Check for mistakes.

logger.info("Setting up OpenAI client for chat completions using GitHub Models")
github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com")
github_model = os.getenv("GITHUB_MODEL", "gpt-4o")
logger.info(f"Using GitHub Models with base URL: {github_base_url}, model: {github_model}")
openai_chat_client = openai.AsyncOpenAI(
base_url=github_base_url,
api_key=os.getenv("GITHUB_TOKEN"),
)
else:
logger.info("Setting up OpenAI client for chat completions using OpenAI.com API key")
openai_chat_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY"))
Expand Down Expand Up @@ -99,6 +108,15 @@ async def create_openai_embed_client(
base_url=os.getenv("OLLAMA_ENDPOINT"),
api_key="nokeyneeded",
)
elif OPENAI_EMBED_HOST == "github":
logger.info("Setting up OpenAI client for embeddings using GitHub Models")
github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com")
github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "text-embedding-3-small")
logger.info(f"Using GitHub Models with base URL: {github_base_url}, embedding model: {github_embed_model}")
openai_embed_client = openai.AsyncOpenAI(
base_url=github_base_url,
api_key=os.getenv("GITHUB_TOKEN"),
)
else:
logger.info("Setting up OpenAI client for embeddings using OpenAI.com API key")
openai_embed_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY"))
Expand Down
2 changes: 1 addition & 1 deletion src/backend/fastapi_app/routes/api_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ async def similar_handler(
f"SELECT *, {context.embedding_column} <=> :embedding as DISTANCE FROM {Item.__tablename__} "
"WHERE id <> :item_id ORDER BY distance LIMIT :n"
),
{"embedding": item.embedding_3l, "n": n, "item_id": id},
{"embedding": getattr(item, context.embedding_column), "n": n, "item_id": id},
)
).fetchall()

Expand Down
2 changes: 2 additions & 0 deletions src/backend/fastapi_app/update_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ async def update_embeddings(in_seed_data=False):
embedding_column = os.getenv("AZURE_OPENAI_EMBEDDING_COLUMN", "embedding_3l")
elif OPENAI_EMBED_HOST == "ollama":
embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN", "embedding_nomic")
elif OPENAI_EMBED_HOST == "github":
embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN", "embedding_3l")
else:
embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l")
logger.info(f"Updating embeddings in column: {embedding_column}")
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def mock_session_env_openai(monkeypatch_session):
monkeypatch_session.setenv("OPENAICOM_KEY", "fakekey")
monkeypatch_session.setenv("OPENAICOM_CHAT_MODEL", "gpt-3.5-turbo")
monkeypatch_session.setenv("OPENAICOM_EMBED_MODEL", "text-embedding-3-large")
monkeypatch_session.setenv("OPENAICOM_EMBED_MODEL_DIMENSIONS", "1024")
monkeypatch_session.setenv("OPENAICOM_EMBED_DIMENSIONS", "1024")
monkeypatch_session.setenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l")

yield
Expand Down
Loading