finalizer module refined, doc_load refined

melih-unsal · Sep 27, 2023 · f2f195b · f2f195b
1 parent 3d84306
commit f2f195b
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 89 deletions.
diff --git a/demogpt/chains/prompts/combine_v2.py b/demogpt/chains/prompts/combine_v2.py
@@ -1,22 +1,31 @@
 system_template = """
-Regenerate the code by only combining the input parts into st.form.
+Regenerate the code by combining all the user input parts into st.form.
 It is really important not to change other parts.
 Copy all the function definitions and library imports as is and don't modify or replace them.
 Combine input-related parts under the st.form.
 If a function needs an input from user via st.text_input, put it between st.form and st.form_submit_button so that the state is preserved.
-Show the result when the form is submitted.
+Show the result when the form is submitted under the if submit_button: statement.
 Keep in mind that don't miss any function definition.
 
 Don't forget to add those functions with their original definitions as is 
 
 {function_names}
+
+Always put "if submit_button:" inside of st.form block
 """
 
 human_template = """
 =============================================================
 DRAFT CODE 1:
 # all imports
 
+openai_api_key = st.sidebar.text_input(
+    "OpenAI API Key",
+    placeholder="sk-...",
+    value=os.getenv("OPENAI_API_KEY", ""),
+    type="password",
+)
+
 def foo1():
     result = "res"
     return result
@@ -43,27 +52,41 @@ def foo2(half_story,user_choice):
 FINAL CODE 1:
 # all imports
 
+# all functions
+
+openai_api_key = st.sidebar.text_input(
+    "OpenAI API Key",
+    placeholder="sk-...",
+    value=os.getenv("OPENAI_API_KEY", ""),
+    type="password",
+)
+
 def foo1():
     result = "res"
     return result
 
-half_story = foo1()
-
-if half_story:
-    st.write(half_story)
-
 def foo2(half_story,user_choice):
     result = half_story + user_choice
     return result
     
 with st.form(key='story_game'):
+    # take all user inputs
 	text_input = st.text_input(label='Enter some text')
     user_choice = st.selectbox("What would you like to do next?", ["Choice1", "Choice2"])
 	submit_button = st.form_submit_button(label='Submit Story')
+    # run functions if submit button is pressed
     if submit_button:
+        half_story = foo1()
+        if half_story:
+            st.write(half_story)
         if text_input and user_choice :
             continued_story = foo2(text_input,user_choice)
+        else:
+            continued_story = ""
+        if continued_story:
             st.markdown(continued_story)
+    else: # if not submitted yet, we need to initizalize continued_story to get rid of name error
+        continued_story = ""
 #############################################################
 
 

diff --git a/demogpt/chains/prompts/task_list/doc_load.py b/demogpt/chains/prompts/task_list/doc_load.py
@@ -1,64 +1,49 @@
 loaders = """
 For Local TXT file:
-from langchain.document_loaders import TextLoader
-loader = TextLoader(<local_txt_file_path>)
+TextLoader
 ################################
 For Web Page:
-from langchain.document_loaders import WebBaseLoader
-loader = WebBaseLoader("<url>")
+WebBaseLoader
 ################################
 For Online PDF:
-from langchain.document_loaders import OnlinePDFLoader
-loader = OnlinePDFLoader("<online_pdf_url>")
+OnlinePDFLoader
 ################################
 For Local PDF:
-from langchain.document_loaders import UnstructuredPDFLoader
-loader = UnstructuredPDFLoader(
-    <local_pdf_full_path>, mode="elements", strategy="fast"
-    )
+UnstructuredPDFLoader
 ################################
 For Power Point:
-from langchain.document_loaders import UnstructuredPowerPointLoader
-loader = UnstructuredPowerPointLoader(
-    <local_powerpoint_file>, mode="elements", strategy="fast"
-    )
+UnstructuredPowerPointLoader
 ################################
 For CSV:
-from langchain.document_loaders.csv_loader import UnstructuredCSVLoader
-loader = UnstructuredCSVLoader(<csv_file_path>, mode="elements")
+UnstructuredCSVLoader
 ################################
 For Excel:
-from langchain.document_loaders.excel import UnstructuredExcelLoader
-loader = UnstructuredExcelLoader(<excel_file_path>, mode="elements")
+UnstructuredExcelLoader
 """
 
+loader_dict = {
+    "txt" : "TextLoader",
+    "web_page" : "WebBaseLoader",
+    "online_pdf" : "OnlinePDFLoader",
+    "pdf" :"UnstructuredPDFLoader",
+    "powerpoint" : "UnstructuredPowerPointLoader",
+    "csv" : "UnstructuredCSVLoader",
+    "excel" :"UnstructuredExcelLoader"
+    }
+
 system_template = f"""
-These are the Loader classes that you should select.
-Select the loader according to the input type unless the input type is ambiguous.
+Based on the provided context in 'Previous Code', choose the most appropriate loader.
+
+These are your loader options:
+
 {loaders}
 """
 
 human_template = """
-Write a loader function using langchain.document_loaders 
-to load the document for the argument name, variable and instruction 
-below like in the below format:
-
-###
-def {function_name}({argument}):
-    loader = Loader(path) # Select the appropriate Loader
-    docs = loader.load()
-    return docs
-
-if {argument}:
-    {variable} = {function_name}({argument})
-else:
-    {variable} = ''
-###
-    
-While using the loader, don't change "mode" and "strategy" arguments, they need to be constant as stated.
-If there are no such arguments, ignore it.
-
-Instruction:{instruction}    
+Use the information from 'Previous Code' to determine the loader from one of the 7 loader options.
+Don't write any explanation but directly say the loader option
 
-Document Loader Code:
+Instruction: {instruction}  
+Previous Code: {code_snippets}
+Loader Option:
 """
diff --git a/demogpt/chains/prompts/task_list/ui_input_file.py b/demogpt/chains/prompts/task_list/ui_input_file.py
@@ -16,6 +16,7 @@
 You will basically use file_uploader and get file path from it but nothing else.
 Do not loose the file path and check if the file is uploaded. Otherwise, assign empty string to "{variable}"
 Don't read the file, only get the file path
+In the st.file_uploader, change type parameter compatible with the type of the expected file such as pdf, csv, ...
 """
 
 human_template = """

diff --git a/demogpt/chains/task_chains.py b/demogpt/chains/task_chains.py
@@ -93,16 +93,15 @@ def pathToContent(cls, task, code_snippets):
         return utils.refine(code)
 
     @classmethod
-    def promptTemplate(cls, task, code_snippets):
+    def promptTemplate(cls, task):
         inputs = task["input_key"]
         instruction = task["description"]
 
         res = cls.getChain(
             system_template=prompts.prompt_template.system_template,
             human_template=prompts.prompt_template.human_template,
             instruction=instruction,
-            inputs=inputs,
-            code_snippets=code_snippets,
+            inputs=inputs
         )
         res = res[res.find("{") : res.rfind("}") + 1]
         return json.loads(res)
@@ -175,16 +174,34 @@ def docLoad(cls, task, code_snippets):
         variable = task["output_key"]
         function_name = task["task_name"]
 
-        code = cls.getChain(
+        loader = cls.getChain(
             system_template=prompts.doc_load.system_template,
             human_template=prompts.doc_load.human_template,
             instruction=instruction,
-            argument=argument,
-            variable=variable,
-            function_name=function_name,
             code_snippets=code_snippets,
         )
-        return utils.refine(code)
+
+
+        if loader in ["TextLoader", "WebBaseLoader", "OnlinePDFLoader"]:
+            loader_line = f'loader = {loader}({argument})'
+        elif loader in ["UnstructuredPDFLoader", "UnstructuredPowerPointLoader"]:
+            loader_line = f'loader = {loader}({argument}, mode="elements", strategy="fast")'
+        elif loader in ["UnstructuredCSVLoader", "UnstructuredExcelLoader"]:
+            loader_line = f'loader = {loader}({argument}, mode="elements")'
+        else:
+            loader_line = f'loader = TextLoader({argument})'
+
+        code = f"""
+def {function_name}({argument}):
+    {loader_line}
+    docs = loader.load()
+    return docs
+if {argument}:
+    {variable} = {function_name}({argument})
+else:
+    {variable} = ''
+        """
+        return code    
 
     @classmethod
     def stringToDoc(cls, task, code_snippets):

diff --git a/demogpt/controllers.py b/demogpt/controllers.py
@@ -49,7 +49,6 @@ def checkDTypes(tasks):
                     feedback += f"""
                     {name} expects all inputs as {reference_input} or none but the data type of {input_key} is {input_data_type} not {reference_input}. Please find another way.\n
                     """
-                    print("1:",)
             else:
                 for res, data_type in zip(input_key, input_data_type):
                     if data_type != reference_input:
@@ -75,10 +74,8 @@ def checkDTypes(tasks):
     return {"feedback": feedback, "valid": valid}
 
 
-def checkPromptTemplates(templates, task):
-    human_template = templates["template"]
-    system_template = templates["system_template"]
-    templates = human_template + system_template
+def checkPromptTemplates(templates, task, additional_inputs=[]):
+    templates = " ".join(list(templates.values()))
     inputs = task["input_key"]
     if inputs == "none":
         inputs = []
@@ -87,8 +84,9 @@ def checkPromptTemplates(templates, task):
             if inputs.startswith("["):
                 inputs = inputs[1:-1]
             inputs = [var.strip() for var in inputs.split(",")]
+    template_inputs =  inputs + additional_inputs
     feedback = ""
-    for input_key in inputs:
+    for input_key in template_inputs:
         if f"{{{input_key}}}" not in templates:
             feedback += f"'{{{input_key}}}' is not included in any of the templates. You must add '{{{input_key}}}' inside of at least one of the templates.\n"
 
@@ -97,7 +95,7 @@ def checkPromptTemplates(templates, task):
     matches = set(re.findall(r"\{([^}]+)\}", templates))
 
     for match in matches:
-        if match not in inputs:
+        if match not in template_inputs:
             feedback += f"'{{{match}}}' cannot be included nowhere in the templates. You must remove '{{{match}}}'.\n"
 
     valid = len(feedback) == 0

diff --git a/demogpt/utils.py b/demogpt/utils.py
@@ -21,7 +21,35 @@ def getFunctionNames(code):
     pattern = r"def (\w+)\(.*\):"
     return re.findall(pattern, code)
 
-
+def getGenericPromptTemplateCode(task, iters):
+    res = ""
+    is_valid = False
+    task_type = task["task_type"]
+    prompt_func = TaskChains.promptTemplate if task_type == "prompt_template" else  TaskChains.chat
+    finalizer_func = getPromptChatTemplateCode if task_type == "prompt_template" else getChatCode
+    additional_inputs = []
+    if task_type == "chat":
+        additional_inputs.append("chat_history")
+    res = prompt_func(task=task)
+    templates = {key:res.get(key) for key in res if "template" in key}
+    function_name = res.get("function_name")
+    variety = res.get("variety")
+    index = 0
+    while not is_valid:
+        check = checkPromptTemplates(templates, task, additional_inputs)
+        is_valid = check["valid"]
+        feedback = check["feedback"]
+        if not is_valid:
+            res = TaskChains.promptTemplateRefiner(res, feedback)
+        else:
+            break
+        index += 1
+        if index == iters:
+            break
+    res["function_name"] = function_name
+    res["variety"] = variety
+    return finalizer_func(res, task)
+
 def getCodeSnippet(task, code_snippets, iters=10):
     task = refineKeyTypeCompatiblity(task)
     task_type = task["task_type"]
@@ -30,27 +58,8 @@ def getCodeSnippet(task, code_snippets, iters=10):
         code = TaskChains.uiInputText(task=task, code_snippets=code_snippets)
     elif task_type == "ui_output_text":
         code = TaskChains.uiOutputText(task=task, code_snippets=code_snippets)
-    elif task_type == "prompt_template":
-        res = ""
-        is_valid = False
-        res = TaskChains.promptTemplate(task=task, code_snippets=code_snippets)
-        function_name = res.get("function_name")
-        variety = res.get("variety")
-        index = 0
-        while not is_valid:
-            check = checkPromptTemplates(res, task)
-            is_valid = check["valid"]
-            feedback = check["feedback"]
-            if not is_valid:
-                res = TaskChains.promptTemplateRefiner(res, feedback)
-            else:
-                break
-            index += 1
-            if index == iters:
-                break
-        res["function_name"] = function_name
-        res["variety"] = variety
-        code = getPromptChatTemplateCode(res, task)
+    elif task_type in ["prompt_template", "chat"]:
+        code = getGenericPromptTemplateCode(task, iters=iters)
     elif task_type == "path_to_content":
         code = TaskChains.pathToContent(task=task, code_snippets=code_snippets)
     elif task_type == "doc_to_string":
@@ -63,9 +72,6 @@ def getCodeSnippet(task, code_snippets, iters=10):
         code = TaskChains.docLoad(task=task, code_snippets=code_snippets)
     elif task_type == "doc_summarizer":
         code = TaskChains.summarize(task=task, code_snippets=code_snippets)
-    elif task_type == "chat":
-        template = TaskChains.chat(task=task)
-        code = getChatCode(template=template, task=task)
     elif task_type == "ui_input_chat":
         code = getChatInputCode(TaskChains.uiInputChat(task=task))
     elif task_type == "ui_output_chat":

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "demogpt"
-version = "1.2.6.3"
+version = "1.2.6.4"
 description = "Auto Gen-AI App Generator with the Power of Llama 2"
 authors = ["Melih Unsal <[email protected]>"]
 license = "MIT"