diff --git a/.env.sample b/.env.sample index 080903ca..e48823c2 100644 --- a/.env.sample +++ b/.env.sample @@ -13,7 +13,7 @@ OPENAI_EMBED_HOST=azure # You also need to `azd auth login` if running this locally AZURE_OPENAI_ENDPOINT=https://YOUR-AZURE-OPENAI-SERVICE-NAME.openai.azure.com AZURE_OPENAI_VERSION=2024-03-01-preview -AZURE_OPENAI_CHAT_DEPLOYMENT=YOUR-AZURE-DEPLOYMENT-NAME +AZURE_OPENAI_CHAT_DEPLOYMENT=chat AZURE_OPENAI_CHAT_MODEL=gpt-35-turbo AZURE_OPENAI_EMBED_DEPLOYMENT=embed AZURE_OPENAI_EMBED_MODEL=text-embedding-ada-002 diff --git a/README.md b/README.md index f311f854..65cef9ec 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ You can run this template virtually by using GitHub Codespaces. The button will azd up ``` - This project uses gpt-3.5-turbo and text-embedding-ada-002 which may not be available in all Azure regions. Check for [up-to-date region availability](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability) and select a region during deployment accordingly. + You will be asked to select two locations, first a region for most of the resources (Container Apps, PostgreSQL), then a region specifically for the Azure OpenAI models. This project uses the gpt-3.5-turbo (version 0125) and text-embedding-ada-002 models which may not be available in all Azure regions. Check for [up-to-date region availability](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability) and select a region accordingly. ### VS Code Dev Containers diff --git a/src/fastapi_app/api_routes.py b/src/fastapi_app/api_routes.py index 8aff9b87..8c03dda7 100644 --- a/src/fastapi_app/api_routes.py +++ b/src/fastapi_app/api_routes.py @@ -36,32 +36,47 @@ async def similar_handler(id: int, n: int = 5): return [item.to_dict() | {"distance": round(distance, 2)} for item, distance in closest] +@router.get("/search") +async def search_handler(query: str, top: int = 5, enable_vector_search: bool = True, enable_text_search: bool = True): + """A search API to find items based on a query.""" + searcher = PostgresSearcher( + global_storage.engine, + openai_embed_client=global_storage.openai_embed_client, + embed_deployment=global_storage.openai_embed_deployment, + embed_model=global_storage.openai_embed_model, + embed_dimensions=global_storage.openai_embed_dimensions, + ) + results = await searcher.search_and_embed( + query, top=top, enable_vector_search=enable_vector_search, enable_text_search=enable_text_search + ) + return [item.to_dict() for item in results] + + @router.post("/chat") async def chat_handler(chat_request: ChatRequest): messages = [message.model_dump() for message in chat_request.messages] overrides = chat_request.context.get("overrides", {}) + searcher = PostgresSearcher( + global_storage.engine, + openai_embed_client=global_storage.openai_embed_client, + embed_deployment=global_storage.openai_embed_deployment, + embed_model=global_storage.openai_embed_model, + embed_dimensions=global_storage.openai_embed_dimensions, + ) if overrides.get("use_advanced_flow"): ragchat = AdvancedRAGChat( - searcher=PostgresSearcher(global_storage.engine), + searcher=searcher, openai_chat_client=global_storage.openai_chat_client, chat_model=global_storage.openai_chat_model, chat_deployment=global_storage.openai_chat_deployment, - openai_embed_client=global_storage.openai_embed_client, - embed_deployment=global_storage.openai_embed_deployment, - embed_model=global_storage.openai_embed_model, - embed_dimensions=global_storage.openai_embed_dimensions, ) else: ragchat = SimpleRAGChat( - searcher=PostgresSearcher(global_storage.engine), + searcher=searcher, openai_chat_client=global_storage.openai_chat_client, chat_model=global_storage.openai_chat_model, chat_deployment=global_storage.openai_chat_deployment, - openai_embed_client=global_storage.openai_embed_client, - embed_deployment=global_storage.openai_embed_deployment, - embed_model=global_storage.openai_embed_model, - embed_dimensions=global_storage.openai_embed_dimensions, ) response = await ragchat.run(messages, overrides=overrides) diff --git a/src/fastapi_app/postgres_searcher.py b/src/fastapi_app/postgres_searcher.py index 1765a23b..5d13650e 100644 --- a/src/fastapi_app/postgres_searcher.py +++ b/src/fastapi_app/postgres_searcher.py @@ -1,13 +1,26 @@ +from openai import AsyncOpenAI from pgvector.utils import to_db from sqlalchemy import Float, Integer, select, text from sqlalchemy.ext.asyncio import async_sessionmaker -from .postgres_models import Item +from fastapi_app.embeddings import compute_text_embedding +from fastapi_app.postgres_models import Item class PostgresSearcher: - def __init__(self, engine): + def __init__( + self, + engine, + openai_embed_client: AsyncOpenAI, + embed_deployment: str | None, # Not needed for non-Azure OpenAI or for retrieval_mode="text" + embed_model: str, + embed_dimensions: int, + ): self.async_session_maker = async_sessionmaker(engine, expire_on_commit=False) + self.openai_embed_client = openai_embed_client + self.embed_model = embed_model + self.embed_deployment = embed_deployment + self.embed_dimensions = embed_dimensions def build_filter_clause(self, filters) -> tuple[str, str]: if filters is None: @@ -26,7 +39,7 @@ async def search( self, query_text: str | None, query_vector: list[float] | list, - query_top: int = 5, + top: int = 5, filters: list[dict] | None = None, ): filter_clause_where, filter_clause_and = self.build_filter_clause(filters) @@ -83,7 +96,32 @@ async def search( # Convert results to Item models items = [] - for id, _ in results[:query_top]: + for id, _ in results[:top]: item = await session.execute(select(Item).where(Item.id == id)) items.append(item.scalar()) return items + + async def search_and_embed( + self, + query_text: str, + top: int = 5, + enable_vector_search: bool = False, + enable_text_search: bool = False, + filters: list[dict] | None = None, + ) -> list[Item]: + """ + Search items by query text. Optionally converts the query text to a vector if enable_vector_search is True. + """ + vector: list[float] = [] + if enable_vector_search: + vector = await compute_text_embedding( + query_text, + self.openai_embed_client, + self.embed_model, + self.embed_deployment, + self.embed_dimensions, + ) + if not enable_text_search: + query_text = None + + return await self.search(query_text, vector, top, filters) diff --git a/src/fastapi_app/rag_advanced.py b/src/fastapi_app/rag_advanced.py index 81a1fd51..426ae996 100644 --- a/src/fastapi_app/rag_advanced.py +++ b/src/fastapi_app/rag_advanced.py @@ -11,7 +11,6 @@ from openai_messages_token_helper import build_messages, get_token_limit from .api_models import ThoughtStep -from .embeddings import compute_text_embedding from .postgres_searcher import PostgresSearcher from .query_rewriter import build_search_function, extract_search_arguments @@ -24,19 +23,11 @@ def __init__( openai_chat_client: AsyncOpenAI, chat_model: str, chat_deployment: str | None, # Not needed for non-Azure OpenAI - openai_embed_client: AsyncOpenAI, - embed_deployment: str | None, # Not needed for non-Azure OpenAI or for retrieval_mode="text" - embed_model: str, - embed_dimensions: int, ): self.searcher = searcher self.openai_chat_client = openai_chat_client self.chat_model = chat_model self.chat_deployment = chat_deployment - self.openai_embed_client = openai_embed_client - self.embed_deployment = embed_deployment - self.embed_model = embed_model - self.embed_dimensions = embed_dimensions self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True) current_dir = pathlib.Path(__file__).parent self.query_prompt_template = open(current_dir / "prompts/query.txt").read() @@ -77,19 +68,13 @@ async def run( query_text, filters = extract_search_arguments(chat_completion) # Retrieve relevant items from the database with the GPT optimized query - vector: list[float] = [] - if vector_search: - vector = await compute_text_embedding( - original_user_query, - self.openai_embed_client, - self.embed_model, - self.embed_deployment, - self.embed_dimensions, - ) - if not text_search: - query_text = None - - results = await self.searcher.search(query_text, vector, top, filters) + results = await self.searcher.search_and_embed( + query_text, + top=top, + enable_vector_search=vector_search, + enable_text_search=text_search, + filters=filters, + ) sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results] content = "\n".join(sources_content) diff --git a/src/fastapi_app/rag_simple.py b/src/fastapi_app/rag_simple.py index fc0864a3..4cf328a7 100644 --- a/src/fastapi_app/rag_simple.py +++ b/src/fastapi_app/rag_simple.py @@ -8,7 +8,6 @@ from openai_messages_token_helper import build_messages, get_token_limit from .api_models import ThoughtStep -from .embeddings import compute_text_embedding from .postgres_searcher import PostgresSearcher @@ -20,19 +19,11 @@ def __init__( openai_chat_client: AsyncOpenAI, chat_model: str, chat_deployment: str | None, # Not needed for non-Azure OpenAI - openai_embed_client: AsyncOpenAI, - embed_deployment: str | None, # Not needed for non-Azure OpenAI or for retrieval_mode="text" - embed_model: str, - embed_dimensions: int, ): self.searcher = searcher self.openai_chat_client = openai_chat_client self.chat_model = chat_model self.chat_deployment = chat_deployment - self.openai_embed_client = openai_embed_client - self.embed_deployment = embed_deployment - self.embed_model = embed_model - self.embed_dimensions = embed_dimensions self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True) current_dir = pathlib.Path(__file__).parent self.answer_prompt_template = open(current_dir / "prompts/answer.txt").read() @@ -48,20 +39,9 @@ async def run( past_messages = messages[:-1] # Retrieve relevant items from the database - vector: list[float] = [] - query_text = None - if vector_search: - vector = await compute_text_embedding( - original_user_query, - self.openai_embed_client, - self.embed_model, - self.embed_deployment, - self.embed_dimensions, - ) - if text_search: - query_text = original_user_query - - results = await self.searcher.search(query_text, vector, top) + results = await self.searcher.search_and_embed( + original_user_query, top=top, enable_vector_search=vector_search, enable_text_search=text_search + ) sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results] content = "\n".join(sources_content) @@ -92,7 +72,7 @@ async def run( "thoughts": [ ThoughtStep( title="Search query for database", - description=query_text, + description=original_user_query if text_search else None, props={ "top": top, "vector_search": vector_search,