Merge pull request #1 from rohit-ganguly/restaurantdata

pamelafox · web-flow · commit 99a1589c3a41 · 2025-05-06T23:37:28.000-07:00
Bring in sample data from Pittsburgh restaurants
diff --git a/convert_csv_json.py b/convert_csv_json.py
@@ -0,0 +1,51 @@
+import ast
+import csv
+import json
+
+# Read CSV file - Using the correct dialect to handle quotes properly
+with open("data.csv", encoding="utf-8") as csv_file:
+    # Use the csv.reader with proper quoting parameters
+    csv_reader = csv.reader(csv_file, quoting=csv.QUOTE_ALL, doublequote=True, escapechar="\\")
+    header = next(csv_reader)  # Get the header row
+    data = list(csv_reader)  # Get all data rows
+
+# Convert to JSON format
+json_data = []
+for row in data:
+    item = {}
+    for i in range(len(header)):
+        if i < len(row):  # Ensure we don't go out of bounds
+            value = row[i].strip()
+            # Check if the value looks like a JSON array
+            if value.startswith("[") and value.endswith("]"):
+                try:
+                    # Parse the JSON-like string into a Python object
+                    value = json.loads(value.replace("'", '"'))
+                except (ValueError, SyntaxError):
+                    try:
+                        # Try with ast as a fallback
+                        value = ast.literal_eval(value)
+                    except (ValueError, SyntaxError):
+                        # If parsing fails, keep it as a string
+                        pass
+            # Convert boolean strings
+            elif value.lower() == "true":
+                value = True
+            elif value.lower() == "false":
+                value = False
+            # Try to convert numbers
+            elif value.isdigit():
+                value = int(value)
+            elif value.replace(".", "", 1).isdigit() and value.count(".") <= 1:
+                value = float(value)
+
+            item[header[i]] = value
+    # remove is_open column
+    del item["is_open"]
+    json_data.append(item)
+
+# Write to JSON file
+with open("data.json", "w", encoding="utf-8") as f:
+    json.dump(json_data, f, indent=4, ensure_ascii=False)
+
+print(f"Successfully converted CSV data to JSON format with {len(json_data)} records")
diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
@@ -71,11 +71,18 @@ class RetrievalResponseDelta(BaseModel):
 
 class ItemPublic(BaseModel):
     id: int
-    type: str
-    brand: str
     name: str
+    location: str
+    cuisine: str
+    rating: int
+    price_level: int
+    review_count: int
+    hours: int
+    tags: str
     description: str
-    price: float
+    menu_summary: str
+    top_reviews: str
+    vibe: str
 
 
 class ItemWithDistance(ItemPublic):
diff --git a/src/backend/fastapi_app/postgres_models.py b/src/backend/fastapi_app/postgres_models.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 from pgvector.sqlalchemy import Vector
-from sqlalchemy import Index
+from sqlalchemy import VARCHAR, Index
+from sqlalchemy.dialects import postgresql
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
 
 
@@ -13,11 +14,19 @@ class Base(DeclarativeBase):
 class Item(Base):
     __tablename__ = "items"
     id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
-    type: Mapped[str] = mapped_column()
-    brand: Mapped[str] = mapped_column()
     name: Mapped[str] = mapped_column()
+    location: Mapped[str] = mapped_column()
+    cuisine: Mapped[str] = mapped_column()
+    rating: Mapped[int] = mapped_column()
+    price_level: Mapped[int] = mapped_column()
+    review_count: Mapped[int] = mapped_column()
+    hours: Mapped[str] = mapped_column()
+    tags: Mapped[list[str]] = mapped_column(postgresql.ARRAY(VARCHAR))  # Array of strings
     description: Mapped[str] = mapped_column()
-    price: Mapped[float] = mapped_column()
+    menu_summary: Mapped[str] = mapped_column()
+    top_reviews: Mapped[str] = mapped_column()
+    vibe: Mapped[str] = mapped_column()
+
     # Embeddings for different models:
     embedding_3l: Mapped[Vector] = mapped_column(Vector(1024), nullable=True)  # text-embedding-3-large
     embedding_nomic: Mapped[Vector] = mapped_column(Vector(768), nullable=True)  # nomic-embed-text
@@ -33,10 +42,10 @@ def to_dict(self, include_embedding: bool = False):
         return model_dict
 
     def to_str_for_rag(self):
-        return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
+        return f"Name:{self.name} Description:{self.description} Location:{self.location} Cuisine:{self.cuisine} Rating:{self.rating} Price Level:{self.price_level} Review Count:{self.review_count} Hours:{self.hours} Tags:{self.tags} Menu Summary:{self.menu_summary} Top Reviews:{self.top_reviews} Vibe:{self.vibe}"  # noqa: E501
 
     def to_str_for_embedding(self):
-        return f"Name: {self.name} Description: {self.description} Type: {self.type}"
+        return f"Name: {self.name} Description: {self.description} Cuisine: {self.cuisine} Tags: {self.tags} Menu Summary: {self.menu_summary} Top Reviews: {self.top_reviews} Vibe: {self.vibe}"  # noqa: E501
 
 
 """
diff --git a/src/backend/fastapi_app/prompts/answer.txt b/src/backend/fastapi_app/prompts/answer.txt
@@ -1,9 +1,9 @@
-Assistant helps customers with questions about products.
-Respond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.
-Answer ONLY with the product details listed in the products.
+Assistant helps Pycon attendees with questions about restaurants.
+Respond as if you are a conference volunteer. Do NOT respond with tables.
+Answer ONLY with the restaurant details listed in the sources.
 If there isn't enough information below, say you don't know.
 Do not generate answers that don't use the sources below.
-Each product has an ID in brackets followed by colon and the product details.
-Always include the product ID for each product you use in the response.
-Use square brackets to reference the source, for example [52].
-Don't combine citations, list each product separately, for example [27][51].
+Each restaurant has an ID in brackets followed by colon and the restaurant details.
+Always include the restaurant ID for each restaurant you reference in the response.
+Use square brackets to reference the restaurant, for example [52].
+Don't combine references, cite each restaurant separately, for example [27][51].
diff --git a/src/backend/fastapi_app/prompts/query.txt b/src/backend/fastapi_app/prompts/query.txt
@@ -1,5 +1,5 @@
 Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.
-You have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.
+You have access to an Azure PostgreSQL database with a restaurants table that has name, description, menu summary, vibe, ratings, etc.
 Generate a search query based on the conversation and the new question.
 If the question is not in English, translate the question to English before generating the search query.
 If you cannot generate a search query, return the original user question.
diff --git a/src/backend/fastapi_app/prompts/query_fewshots.json b/src/backend/fastapi_app/prompts/query_fewshots.json
@@ -1,34 +1,34 @@
 [
-    {"role": "user", "content": "good options for climbing gear that can be used outside?"},
+    {"role": "user", "content": "good options for ethiopian restaurants?"},
     {"role": "assistant", "tool_calls": [
         {
             "id": "call_abc123",
             "type": "function",
             "function": {
-                "arguments": "{\"search_query\":\"climbing gear outside\"}",
+                "arguments": "{\"search_query\":\"ethiopian\"}",
                 "name": "search_database"
             }
         }
     ]},
     {
         "role": "tool",
         "tool_call_id": "call_abc123",
-        "content": "Search results for climbing gear that can be used outside: ..."
+        "content": "Search results for ethiopian: ..."
     },
-    {"role": "user", "content": "are there any shoes less than $50?"},
+    {"role": "user", "content": "are there any inexpensive chinese restaurants?"},
     {"role": "assistant", "tool_calls": [
         {
             "id": "call_abc456",
             "type": "function",
             "function": {
-                "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
+                "arguments": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",
                 "name": "search_database"
             }
         }
     ]},
     {
         "role": "tool",
         "tool_call_id": "call_abc456",
-        "content": "Search results for shoes cheaper than 50: ..."
+        "content": "Search results for chinese: ..."
     }
 ]
diff --git a/src/backend/fastapi_app/query_rewriter.py b/src/backend/fastapi_app/query_rewriter.py
@@ -12,39 +12,39 @@ def build_search_function() -> list[ChatCompletionToolParam]:
             "type": "function",
             "function": {
                 "name": "search_database",
-                "description": "Search PostgreSQL database for relevant products based on user query",
+                "description": "Search PostgreSQL database for relevant restaurants based on user query",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "search_query": {
                             "type": "string",
                             "description": "Query string to use for full text search, e.g. 'red shoes'",
                         },
-                        "price_filter": {
+                        "price_level_filter": {
                             "type": "object",
-                            "description": "Filter search results based on price of the product",
+                            "description": "Filter search results to a certain price level (from 1 $ to 4 $$$$, with 4 being most costly)",  # noqa: E501
                             "properties": {
                                 "comparison_operator": {
                                     "type": "string",
-                                    "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa
+                                    "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa: E501
                                 },
                                 "value": {
                                     "type": "number",
-                                    "description": "Value to compare against, e.g. 30",
+                                    "description": "Value to compare against, either 1, 2, 3, 4",
                                 },
                             },
                         },
-                        "brand_filter": {
+                        "rating_filter": {
                             "type": "object",
-                            "description": "Filter search results based on brand of the product",
+                            "description": "Filter search results based on ratings of restaurant (from 1 to 5 stars, with 5 the best)",  # noqa: E501
                             "properties": {
                                 "comparison_operator": {
                                     "type": "string",
-                                    "description": "Operator to compare the column value, either '=' or '!='",
+                                    "description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='",  # noqa: E501
                                 },
                                 "value": {
                                     "type": "string",
-                                    "description": "Value to compare against, e.g. AirStrider",
+                                    "description": "Value to compare against, either 0 1 2 3 4 5",
                                 },
                             },
                         },
@@ -69,22 +69,26 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp
                 arg = json.loads(function.arguments)
                 # Even though its required, search_query is not always specified
                 search_query = arg.get("search_query", original_user_query)
-                if "price_filter" in arg and arg["price_filter"] and isinstance(arg["price_filter"], dict):
-                    price_filter = arg["price_filter"]
+                if (
+                    "price_level_filter" in arg
+                    and arg["price_level_filter"]
+                    and isinstance(arg["price_level_filter"], dict)
+                ):
+                    price_level_filter = arg["price_level_filter"]
                     filters.append(
                         {
-                            "column": "price",
-                            "comparison_operator": price_filter["comparison_operator"],
-                            "value": price_filter["value"],
+                            "column": "price_level",
+                            "comparison_operator": price_level_filter["comparison_operator"],
+                            "value": price_level_filter["value"],
                         }
                     )
-                if "brand_filter" in arg and arg["brand_filter"] and isinstance(arg["brand_filter"], dict):
-                    brand_filter = arg["brand_filter"]
+                if "rating_filter" in arg and arg["rating_filter"] and isinstance(arg["rating_filter"], dict):
+                    rating_filter = arg["rating_filter"]
                     filters.append(
                         {
-                            "column": "brand",
-                            "comparison_operator": brand_filter["comparison_operator"],
-                            "value": brand_filter["value"],
+                            "column": "rating",
+                            "comparison_operator": rating_filter["comparison_operator"],
+                            "value": rating_filter["value"],
                         }
                     )
     elif query_text := response_message.content:
diff --git a/src/backend/fastapi_app/seed_data.json b/src/backend/fastapi_app/seed_data.json
diff --git a/src/frontend/src/components/Answer/Answer.tsx b/src/frontend/src/components/Answer/Answer.tsx
diff --git a/src/frontend/src/components/Example/ExampleList.tsx b/src/frontend/src/components/Example/ExampleList.tsx

Original file line number	Diff line number	Diff line change
`@@ -1,34 +1,34 @@`
`1`	`1`	`[`
`2`		`- {"role": "user", "content": "good options for climbing gear that can be used outside?"},`
	`2`	`+ {"role": "user", "content": "good options for ethiopian restaurants?"},`
`3`	`3`	`{"role": "assistant", "tool_calls": [`
`4`	`4`	`{`
`5`	`5`	`"id": "call_abc123",`
`6`	`6`	`"type": "function",`
`7`	`7`	`"function": {`
`8`		`- "arguments": "{\"search_query\":\"climbing gear outside\"}",`
	`8`	`+ "arguments": "{\"search_query\":\"ethiopian\"}",`
`9`	`9`	`"name": "search_database"`
`10`	`10`	`}`
`11`	`11`	`}`
`12`	`12`	`]},`
`13`	`13`	`{`
`14`	`14`	`"role": "tool",`
`15`	`15`	`"tool_call_id": "call_abc123",`
`16`		`- "content": "Search results for climbing gear that can be used outside: ..."`
	`16`	`+ "content": "Search results for ethiopian: ..."`
`17`	`17`	`},`
`18`		`- {"role": "user", "content": "are there any shoes less than $50?"},`
	`18`	`+ {"role": "user", "content": "are there any inexpensive chinese restaurants?"},`
`19`	`19`	`{"role": "assistant", "tool_calls": [`
`20`	`20`	`{`
`21`	`21`	`"id": "call_abc456",`
`22`	`22`	`"type": "function",`
`23`	`23`	`"function": {`
`24`		`- "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",`
	`24`	`+ "arguments": "{\"search_query\":\"chinese\",\"price_level_filter\":{\"comparison_operator\":\"<\",\"value\":3}}",`
`25`	`25`	`"name": "search_database"`
`26`	`26`	`}`
`27`	`27`	`}`
`28`	`28`	`]},`
`29`	`29`	`{`
`30`	`30`	`"role": "tool",`
`31`	`31`	`"tool_call_id": "call_abc456",`
`32`		`- "content": "Search results for shoes cheaper than 50: ..."`
	`32`	`+ "content": "Search results for chinese: ..."`
`33`	`33`	`}`
`34`	`34`	`]`