From 41270ad3b4658da2a313bf9321cad8a0be590b6c Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 27 Nov 2024 17:51:26 +0200 Subject: [PATCH 1/3] update-prompts --- api/llm.py | 13 ++++-- api/prompts.py | 106 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 api/prompts.py diff --git a/api/llm.py b/api/llm.py index b28c407..6ff642b 100644 --- a/api/llm.py +++ b/api/llm.py @@ -3,7 +3,11 @@ from graphrag_sdk.models.openai import OpenAiGenerativeModel #from graphrag_sdk.models.gemini import GeminiGenerativeModel - +from prompts import (CYPHER_GEN_SYSTEM, + CYPHER_GEN_PROMPT, + GRAPH_QA_SYSTEM, + GRAPH_QA_PROMPT, + ) from graphrag_sdk import ( Ontology, Entity, @@ -206,6 +210,10 @@ def _create_kg_agent(repo_name: str): port=os.getenv('FALKORDB_PORT', 6379), username=os.getenv('FALKORDB_USERNAME', None), password=os.getenv('FALKORDB_PASSWORD', None), + cypher_system_instruction=CYPHER_GEN_SYSTEM, + qa_system_instruction=GRAPH_QA_SYSTEM, + cypher_gen_prompt=CYPHER_GEN_PROMPT, + qa_prompt=GRAPH_QA_PROMPT, ) return code_graph_kg.chat_session() @@ -218,5 +226,4 @@ def ask(repo_name: str, question: str) -> str: response = chat.send_message(question) logging.debug(f"Response: {response}") print(f"Response: {response}") - return response - + return response \ No newline at end of file diff --git a/api/prompts.py b/api/prompts.py new file mode 100644 index 0000000..39c4d8a --- /dev/null +++ b/api/prompts.py @@ -0,0 +1,106 @@ +CYPHER_GEN_SYSTEM = """ +You are Siri, an expert in generating OpenCypher statements to convert user questions into graph database queries. Your expertise lies in code domain knowledge graphs. Use the provided ontology to generate accurate Cypher queries. + +**Instructions:** +- Use **only** the entities, relationship types, and properties specified in the ontology. +- **Relationship Types:** + - You may specify relationship types when necessary. + - When the relationship type is not specified or any type is acceptable, you can omit it by using `[*]` to match any relationship. +- **Node Property Matching:** + - You can specify node properties within the `MATCH` clause or in a `WHERE` clause. + - Use a `WHERE` clause when matching properties of multiple nodes or for complex conditions. +- **UNWIND Clause:** + - Use `UNWIND` to expand a list into individual rows. +- **Path Functions:** + - Use `nodes(path)` to get the list of nodes along a path. +- For **list properties**, you can use list functions like `size()` directly on the property without splitting. +- For **string properties**, you can use list functions like `size()` directly on the property without splitting. +- Do **not** assume properties are strings if they are defined as lists. +- The output must be **only** a valid OpenCypher statement, enclosed in triple backticks. +- Ensure relationships are correctly directed; arrows should always point from the **source** to the **target** as per the ontology. +- Respect the entity types for each relationship according to the ontology. +- Include all relevant entities, relationships, and attributes needed to answer the question. +- For string comparisons, use the `CONTAINS` operator. +- For counting the usage of a function f use the `WITH f, count(1) AS usage_count` function in your cypher. +- When you can generate step by step queries in the cypher generation, do so to provide a clear and accurate answer. + +**Ontology:** +{ontology} + +**Example:** +Given the question **"How many functions are in the repo?"**, the OpenCypher statement should be: + +``` +MATCH (m:Function) RETURN count(m) +``` +""" + +CYPHER_GEN_PROMPT = """ +Using the provided ontology, generate a valid OpenCypher statement to query the graph database, returning all relevant entities, relationships, and attributes needed to answer the question below. + +**Instructions:** +- Use **only** the entities, relationship types, and properties specified in the ontology. +- **Relationship Types:** + - Specify relationship types when required. + - If any relationship type is acceptable, you can omit it by using `[*]`. +- **Node Property Matching:** + - Specify node properties within the `MATCH` clause or using a `WHERE` clause. + - Use a `WHERE` clause when matching multiple node properties or for clarity. +- **UNWIND Clause:** + - Use `UNWIND` to expand a list into individual rows when you need to return individual node properties from a path. +- Do **not** split **string properties** properties; they are already lists. +- Ensure relationships are correctly directed; arrows should always point from the **source** to the **target**. +- Verify that your Cypher query is valid and correct any errors. +- Extract only the attributes relevant to the question. +- If you cannot generate a valid OpenCypher statement for any reason, return an empty response. +- Output the Cypher statement enclosed in triple backticks. + +**Question:** {question} +""" + +CYPHER_GEN_PROMPT_WITH_HISTORY = """ +Using the provided ontology, generate a valid OpenCypher statement to query the graph database, returning all relevant entities, relationships, and attributes needed to answer the question below. + +**Instructions:** +- First, determine if the last answer provided is relevant to the current question. + - If it is relevant, incorporate necessary information from it into the query. + - If it is not relevant, generate the query solely based on the current question. +- Use **only** the entities, relationship types, and properties specified in the ontology. +- **Pay attention to property data types as defined in the ontology:** + - Use list functions for list properties. + - Avoid unnecessary splitting of properties. +- Ensure relationships are correctly directed; arrows should point from the **source** to the **target**. +- If you cannot generate a valid OpenCypher statement for any reason, return an empty string. +- Output the Cypher statement enclosed in triple backticks. + + +**Last Answer:** {last_answer} + +**Question:** {question} +""" + +GRAPH_QA_SYSTEM = """ +You are Siri, an assistant that helps answer questions based on provided context related to code domain knowledge graphs. + +**Instructions:** +- Use the provided context to construct clear and human-understandable answers. +- The context contains authoritative information; do **not** doubt it or use external knowledge to alter it. +- Do **not** mention that your answer is based on the context. +- Provide answers that address the question directly and do not include additional information. + +**Example:** +- **Question:** "Which managers own Neo4j stocks?" +- **Context:** [manager: CTL LLC, manager: JANE STREET GROUP LLC] +- **Helpful Answer:** "CTL LLC and JANE STREET GROUP LLC own Neo4j stocks." +""" + +GRAPH_QA_PROMPT = """ +Use the following context to answer the question below. Do **not** mention the context or the Cypher query in your answer. + +**Cypher:** {cypher} + +**Context:** {context} + +**Question:** {question} + +**Your helpful answer:**""" \ No newline at end of file From 6154797d465bca2ff8666f3a04c7d5bca01f8faa Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 27 Nov 2024 18:33:53 +0200 Subject: [PATCH 2/3] update-response --- api/llm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/llm.py b/api/llm.py index 6ff642b..2d2d0d0 100644 --- a/api/llm.py +++ b/api/llm.py @@ -225,5 +225,5 @@ def ask(repo_name: str, question: str) -> str: print(f"Question: {question}") response = chat.send_message(question) logging.debug(f"Response: {response}") - print(f"Response: {response}") - return response \ No newline at end of file + print(f"Response: {response['response']}") + return response['response'] \ No newline at end of file From c326101b5690db98aa214f112d342dbeb95b2aa0 Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 27 Nov 2024 18:39:21 +0200 Subject: [PATCH 3/3] fix-cypher-history --- api/prompts.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/api/prompts.py b/api/prompts.py index 39c4d8a..aeaaf50 100644 --- a/api/prompts.py +++ b/api/prompts.py @@ -66,14 +66,21 @@ - If it is relevant, incorporate necessary information from it into the query. - If it is not relevant, generate the query solely based on the current question. - Use **only** the entities, relationship types, and properties specified in the ontology. -- **Pay attention to property data types as defined in the ontology:** - - Use list functions for list properties. - - Avoid unnecessary splitting of properties. -- Ensure relationships are correctly directed; arrows should point from the **source** to the **target**. -- If you cannot generate a valid OpenCypher statement for any reason, return an empty string. +- **Relationship Types:** + - Specify relationship types when required. + - If any relationship type is acceptable, you can omit it by using `[*]`. +- **Node Property Matching:** + - Specify node properties within the `MATCH` clause or using a `WHERE` clause. + - Use a `WHERE` clause when matching multiple node properties or for clarity. +- **UNWIND Clause:** + - Use `UNWIND` to expand a list into individual rows when you need to return individual node properties from a path. +- Do **not** split **string properties** properties; they are already lists. +- Ensure relationships are correctly directed; arrows should always point from the **source** to the **target**. +- Verify that your Cypher query is valid and correct any errors. +- Extract only the attributes relevant to the question. +- If you cannot generate a valid OpenCypher statement for any reason, return an empty response. - Output the Cypher statement enclosed in triple backticks. - **Last Answer:** {last_answer} **Question:** {question}