Skip to content

Rohitganguly docs #209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
# RAG on PostgreSQL
# RAG on PostgreSQL (PyCon US 2025 Demo!)

> This repository is used in the Microsoft sponsor session at PyCon US 2025 by Rohit Ganguly and Pamela Fox.
> If you're interested in seeing the original repository for this, visit Pamela's original one that this is forked from [here](https://github.com/Azure-Samples/rag-postgres-openai-python)!

## Resource links used in the presentation:

### VS Code Extensions
- [Python](https://marketplace.visualstudio.com/items?itemName=ms-python.python)
- [GitHub Copilot](https://marketplace.visualstudio.com/items?itemName=GitHub.copilot)
- [GitHub Copilot for Azure](https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-azure-github-copilot)
- [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
- [The (new!) PostgreSQL Extension (link coming soon!)](#)
- [AI Toolkit](https://marketplace.visualstudio.com/items?itemName=ms-windows-ai-studio.windows-ai-studio)

### Azure Services & Tools
- [Azure Developer CLI](https://aka.ms/azd)
- [Azure Container Apps](https://aka.ms/acadocs)
- [Azure Database for PostgreSQL](https://aka.ms/postgresdocs)
- [Azure AI Foundry](https://aka.ms/aifoundrydocs)


[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/Azure-Samples/rag-postgres-openai-python)
[![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/rag-postgres-openai-python)
Expand Down
51 changes: 51 additions & 0 deletions convert_csv_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import ast
import csv
import json

# Read CSV file - Using the correct dialect to handle quotes properly
with open("data.csv", encoding="utf-8") as csv_file:
# Use the csv.reader with proper quoting parameters
csv_reader = csv.reader(csv_file, quoting=csv.QUOTE_ALL, doublequote=True, escapechar="\\")
header = next(csv_reader) # Get the header row
data = list(csv_reader) # Get all data rows

# Convert to JSON format
json_data = []
for row in data:
item = {}
for i in range(len(header)):
if i < len(row): # Ensure we don't go out of bounds
value = row[i].strip()
# Check if the value looks like a JSON array
if value.startswith("[") and value.endswith("]"):
try:
# Parse the JSON-like string into a Python object
value = json.loads(value.replace("'", '"'))
except (ValueError, SyntaxError):
try:
# Try with ast as a fallback
value = ast.literal_eval(value)
except (ValueError, SyntaxError):
# If parsing fails, keep it as a string
pass
# Convert boolean strings
elif value.lower() == "true":
value = True
elif value.lower() == "false":
value = False
# Try to convert numbers
elif value.isdigit():
value = int(value)
elif value.replace(".", "", 1).isdigit() and value.count(".") <= 1:
value = float(value)

item[header[i]] = value
# remove is_open column
del item["is_open"]
json_data.append(item)

# Write to JSON file
with open("data.json", "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=4, ensure_ascii=False)

print(f"Successfully converted CSV data to JSON format with {len(json_data)} records")
33 changes: 21 additions & 12 deletions src/backend/fastapi_app/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,21 @@ class ChatRequest(BaseModel):

class ItemPublic(BaseModel):
id: int
type: str
brand: str
name: str
location: str
cuisine: str
rating: int
price_level: int
review_count: int
hours: str
tags: list[str]
description: str
price: float
menu_summary: str
top_reviews: str
vibe: str

def to_str_for_rag(self):
return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}" # noqa: E501


class ItemWithDistance(ItemPublic):
Expand Down Expand Up @@ -105,16 +112,18 @@ class Filter(BaseModel):
value: Any


class PriceFilter(Filter):
column: str = Field(default="price", description="The column to filter on (always 'price' for this filter)")
comparison_operator: str = Field(description="The operator for price comparison ('>', '<', '>=', '<=', '=')")
value: float = Field(description="The price value to compare against (e.g., 30.00)")
class PriceLevelFilter(Filter):
column: str = Field(
default="price_level", description="The column to filter on (always 'price_level' for this filter)"
)
comparison_operator: str = Field(description="The operator for price level comparison ('>', '<', '>=', '<=', '=')")
value: float = Field(description="Value to compare against, either 1, 2, 3, 4")


class BrandFilter(Filter):
column: str = Field(default="brand", description="The column to filter on (always 'brand' for this filter)")
comparison_operator: str = Field(description="The operator for brand comparison ('=' or '!=')")
value: str = Field(description="The brand name to compare against (e.g., 'AirStrider')")
class RatingFilter(Filter):
column: str = Field(default="rating", description="The column to filter on (always 'rating' for this filter)")
comparison_operator: str = Field(description="The operator for rating comparison ('>', '<', '>=', '<=', '=')")
value: str = Field(description="Value to compare against, either 0 1 2 3 4")


class SearchResults(BaseModel):
Expand Down
21 changes: 15 additions & 6 deletions src/backend/fastapi_app/postgres_models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

from pgvector.sqlalchemy import Vector
from sqlalchemy import Index
from sqlalchemy import VARCHAR, Index
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column


Expand All @@ -13,11 +14,19 @@ class Base(DeclarativeBase):
class Item(Base):
__tablename__ = "items"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
type: Mapped[str] = mapped_column()
brand: Mapped[str] = mapped_column()
name: Mapped[str] = mapped_column()
location: Mapped[str] = mapped_column()
cuisine: Mapped[str] = mapped_column()
rating: Mapped[int] = mapped_column()
price_level: Mapped[int] = mapped_column()
review_count: Mapped[int] = mapped_column()
hours: Mapped[str] = mapped_column()
tags: Mapped[list[str]] = mapped_column(postgresql.ARRAY(VARCHAR)) # Array of strings
description: Mapped[str] = mapped_column()
price: Mapped[float] = mapped_column()
menu_summary: Mapped[str] = mapped_column()
top_reviews: Mapped[str] = mapped_column()
vibe: Mapped[str] = mapped_column()

# Embeddings for different models:
embedding_3l: Mapped[Vector] = mapped_column(Vector(1024), nullable=True) # text-embedding-3-large
embedding_nomic: Mapped[Vector] = mapped_column(Vector(768), nullable=True) # nomic-embed-text
Expand All @@ -33,10 +42,10 @@ def to_dict(self, include_embedding: bool = False):
return model_dict

def to_str_for_rag(self):
return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}" # noqa: E501

def to_str_for_embedding(self):
return f"Name: {self.name} Description: {self.description} Type: {self.type}"
return f"Name: {self.name} Description: {self.description} Cuisine: {self.cuisine} Tags: {self.tags} Menu Summary: {self.menu_summary} Top Reviews: {self.top_reviews} Vibe: {self.vibe}" # noqa: E501


"""
Expand Down
14 changes: 7 additions & 7 deletions src/backend/fastapi_app/prompts/answer.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Assistant helps customers with questions about products.
Respond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.
Answer ONLY with the product details listed in the products.
Assistant helps Pycon attendees with questions about restaurants.
Respond as if you are a conference volunteer. Do NOT respond with tables.
Answer ONLY with the restaurant details listed in the sources.
If there isn't enough information below, say you don't know.
Do not generate answers that don't use the sources below.
Each product has an ID in brackets followed by colon and the product details.
Always include the product ID for each product you use in the response.
Use square brackets to reference the source, for example [52].
Don't combine citations, list each product separately, for example [27][51].
Each restaurant has an ID in brackets followed by colon and the restaurant details.
Always include the restaurant ID for each restaurant you reference in the response.
Use square brackets to reference the restaurant, for example [52].
Don't combine references, cite each restaurant separately, for example [27][51].
4 changes: 2 additions & 2 deletions src/backend/fastapi_app/prompts/query.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Your job is to find search results based off the user's question and past messages.
You have access to only these tools:
1. **search_database**: This tool allows you to search a table for items based on a query.
1. **search_database**: This tool allows you to search a table for restaurants based on a query.
You can pass in a search query and optional filters.
Once you get the search results, you're done.
Once you get the search results, you're done.
12 changes: 6 additions & 6 deletions src/backend/fastapi_app/prompts/query_fewshots.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
"parts": [
{
"content": "good options for climbing gear that can be used outside?",
"content": "good options for ethiopian restaurants?",
"timestamp": "2025-05-07T19:02:46.977501Z",
"part_kind": "user-prompt"
}
Expand All @@ -14,7 +14,7 @@
"parts": [
{
"tool_name": "search_database",
"args": "{\"search_query\":\"climbing gear outside\"}",
"args": "{\"search_query\":\"ethiopian\"}",
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
"part_kind": "tool-call"
}
Expand All @@ -27,7 +27,7 @@
"parts": [
{
"tool_name": "search_database",
"content": "Search results for climbing gear that can be used outside: ...",
"content": "Search results for ethiopian: ...",
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
"timestamp": "2025-05-07T19:02:48.242408Z",
"part_kind": "tool-return"
Expand All @@ -39,7 +39,7 @@
{
"parts": [
{
"content": "are there any shoes less than $50?",
"content": "are there any inexpensive chinese restaurants?",
"timestamp": "2025-05-07T19:02:46.977501Z",
"part_kind": "user-prompt"
}
Expand All @@ -51,7 +51,7 @@
"parts": [
{
"tool_name": "search_database",
"args": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
"args": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
"part_kind": "tool-call"
}
Expand All @@ -64,7 +64,7 @@
"parts": [
{
"tool_name": "search_database",
"content": "Search results for shoes cheaper than 50: ...",
"content": "Search results for chinese: ...",
"tool_call_id": "call_4HeBCmo2uioV6CyoePEGyZPc",
"timestamp": "2025-05-07T19:02:48.242408Z",
"part_kind": "tool-return"
Expand Down
42 changes: 23 additions & 19 deletions src/backend/fastapi_app/query_rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,39 +12,39 @@ def build_search_function() -> list[ChatCompletionToolParam]:
"type": "function",
"function": {
"name": "search_database",
"description": "Search PostgreSQL database for relevant products based on user query",
"description": "Search PostgreSQL database for relevant restaurants based on user query",
"parameters": {
"type": "object",
"properties": {
"search_query": {
"type": "string",
"description": "Query string to use for full text search, e.g. 'red shoes'",
},
"price_filter": {
"price_level_filter": {
"type": "object",
"description": "Filter search results based on price of the product",
"description": "Filter search results to a certain price level (from 1 $ to 4 $$$$, with 4 being most costly)", # noqa: E501
"properties": {
"comparison_operator": {
"type": "string",
"description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa
"description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa: E501
},
"value": {
"type": "number",
"description": "Value to compare against, e.g. 30",
"description": "Value to compare against, either 1, 2, 3, 4",
},
},
},
"brand_filter": {
"rating_filter": {
"type": "object",
"description": "Filter search results based on brand of the product",
"description": "Filter search results based on ratings of restaurant (from 1 to 5 stars, with 5 the best)", # noqa: E501
"properties": {
"comparison_operator": {
"type": "string",
"description": "Operator to compare the column value, either '=' or '!='",
"description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa: E501
},
"value": {
"type": "string",
"description": "Value to compare against, e.g. AirStrider",
"description": "Value to compare against, either 0 1 2 3 4 5",
},
},
},
Expand All @@ -69,22 +69,26 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp
arg = json.loads(function.arguments)
# Even though its required, search_query is not always specified
search_query = arg.get("search_query", original_user_query)
if "price_filter" in arg and arg["price_filter"] and isinstance(arg["price_filter"], dict):
price_filter = arg["price_filter"]
if (
"price_level_filter" in arg
and arg["price_level_filter"]
and isinstance(arg["price_level_filter"], dict)
):
price_level_filter = arg["price_level_filter"]
filters.append(
{
"column": "price",
"comparison_operator": price_filter["comparison_operator"],
"value": price_filter["value"],
"column": "price_level",
"comparison_operator": price_level_filter["comparison_operator"],
"value": price_level_filter["value"],
}
)
if "brand_filter" in arg and arg["brand_filter"] and isinstance(arg["brand_filter"], dict):
brand_filter = arg["brand_filter"]
if "rating_filter" in arg and arg["rating_filter"] and isinstance(arg["rating_filter"], dict):
rating_filter = arg["rating_filter"]
filters.append(
{
"column": "brand",
"comparison_operator": brand_filter["comparison_operator"],
"value": brand_filter["value"],
"column": "rating",
"comparison_operator": rating_filter["comparison_operator"],
"value": rating_filter["value"],
}
)
elif query_text := response_message.content:
Expand Down
8 changes: 4 additions & 4 deletions src/backend/fastapi_app/rag_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

from fastapi_app.api_models import (
AIChatRoles,
BrandFilter,
ChatRequestOverrides,
Filter,
ItemPublic,
Message,
PriceFilter,
PriceLevelFilter,
RAGContext,
RatingFilter,
RetrievalResponse,
RetrievalResponseDelta,
SearchResults,
Expand Down Expand Up @@ -75,8 +75,8 @@ async def search_database(
self,
ctx: RunContext[ChatParams],
search_query: str,
price_filter: Optional[PriceFilter] = None,
brand_filter: Optional[BrandFilter] = None,
price_filter: Optional[PriceLevelFilter] = None,
brand_filter: Optional[RatingFilter] = None,
) -> SearchResults:
"""
Search PostgreSQL database for relevant products based on user query
Expand Down
Loading
Loading