From 6ffd616280d99e016bb62ef7b1d0bc4c29cbc3ab Mon Sep 17 00:00:00 2001
From: pramitchoudhary <pramitchoudhary@gmail.com>
Date: Thu, 18 May 2023 15:18:11 -0700
Subject: [PATCH] Further adjust the prompts #1

---
 sidekick/configs/prompt_template.py | 44 ++++++++++++++---------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/sidekick/configs/prompt_template.py b/sidekick/configs/prompt_template.py
index 8fc74c3..ef3dd74 100644
--- a/sidekick/configs/prompt_template.py
+++ b/sidekick/configs/prompt_template.py
@@ -1,16 +1,16 @@
 TASK_PROMPT = {
     "system_prompt": "Act as a Data Analyst",
     "user_prompt": """
-        ### For Table: {_table_name} Given an input *Question*, only return specific and informative tasks as an ordered itemized list for SQL generation that answer the question.
-        Extract all of the proper nouns (generally capitalized, abbreviated) from the Samples section and add to Context section as Key, Value pair.
-        Use the Context section and Samples section to establish relationship when tokens from Question does not match column names.
-        If information is not found in Context or Samples section, attempt to reason for possible tasks but also ask questions for.
-        Infer the return type of the Question. Do not generate final SQL response, only return tasks.
-        # Data information: \n{_data_info}
-        # Samples: \n{_sample_queries}
-        # Context: {_context}
-        # *Question*: {_question_str};
-        # Output: Tasks: ordered list of tasks
+        ### Given an input *Question*, only return specific and informative tasks as an ordered numeric list for SQL generation that answer the question.
+        Use the *History* and *Context* section for co-reference and to infer relationships.
+        If the words in the *Question* do not match column names *Data* section; Search for them in *Context* section.
+        Always use *Context* with highest similarity score with the *Question*.
+        If no information related to the *Question* is found; attempt to predict and reason for possible tasks.
+        Infer the return type of the Question. Do not generate final complete SQL response, only return tasks.
+        # *Data:* \nFor table {_table_name} schema info is mentioned below,\n{_data_info}
+        # *History*: \n{_sample_queries}
+        # *Question*: For Table {_table_name}, {_question_str}, *Context*: {_context}
+        # Output: Tasks: ordered numeric list of tasks
     """,
 }
 
@@ -21,23 +21,21 @@
 # Reference: https://arxiv.org/pdf/2005.14165.pdf
 QUERY_PROMPT = """
                 ### System: Act as a SQL Expert
-                # Given an input *Question*, only generate syntactically correct SQL queries using step by step reasoning from Tasks section.
-                # Extract all of the proper nouns (generally capitalized, abbreviated) from the Examples section and add to Context section as Key, Value pair.
-                # Use the context section to establish relationship when tokens from Question does not match column names.
+                # Given an input *Question*, only generate syntactically correct SQL queries using step by step reasoning from *Tasks* section.
                 # Pick the SQL query which has the highest average log probability of explaining the
-                candidate question.
+                candidate *Question*.
                 ### {dialect} SQL tables
-                Examples:\n{_sample_queries}
-                ### *Question*: {_question};
+                ### *History*:\n{_sample_queries}
+                ### *Question*: {_question}
                 # SELECT 1
-                ### Tasks:\n{_tasks}
-                ### Context: {_context}
-                ### Suggestions:
-                # Don't use aggregate and window function together;
-                # Avoid COUNT(*) and prefer COUNT(1);
+                ### *Tasks*:\n{_tasks}
+                ### *Policies for SQL generation*:
+                # Avoid overly complex SQL queries
+                # Don't use aggregate and window function together
+                # Use COUNT(1) instead of COUNT(*)
                 # Return with LIMIT 100
-                # Prefer NOT EXISTS to LEFT JOIN ON null id;
-                # Avoid using the WITH statement;
+                # Prefer NOT EXISTS to LEFT JOIN ON null id
+                # Avoid using the WITH statement
                 # When using DESC keep NULLs at the end
                 # If JSONB format found in Table schema, do pattern matching on keywords from the question
                 # Add explanation