Skip to content

Commit

Permalink
finalizer module refined, doc_load refined
Browse files Browse the repository at this point in the history
  • Loading branch information
melih-unsal committed Sep 27, 2023
1 parent 3d84306 commit f2f195b
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 89 deletions.
37 changes: 30 additions & 7 deletions demogpt/chains/prompts/combine_v2.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
system_template = """
Regenerate the code by only combining the input parts into st.form.
Regenerate the code by combining all the user input parts into st.form.
It is really important not to change other parts.
Copy all the function definitions and library imports as is and don't modify or replace them.
Combine input-related parts under the st.form.
If a function needs an input from user via st.text_input, put it between st.form and st.form_submit_button so that the state is preserved.
Show the result when the form is submitted.
Show the result when the form is submitted under the if submit_button: statement.
Keep in mind that don't miss any function definition.
Don't forget to add those functions with their original definitions as is
{function_names}
Always put "if submit_button:" inside of st.form block
"""

human_template = """
=============================================================
DRAFT CODE 1:
# all imports
openai_api_key = st.sidebar.text_input(
"OpenAI API Key",
placeholder="sk-...",
value=os.getenv("OPENAI_API_KEY", ""),
type="password",
)
def foo1():
result = "res"
return result
Expand All @@ -43,27 +52,41 @@ def foo2(half_story,user_choice):
FINAL CODE 1:
# all imports
# all functions
openai_api_key = st.sidebar.text_input(
"OpenAI API Key",
placeholder="sk-...",
value=os.getenv("OPENAI_API_KEY", ""),
type="password",
)
def foo1():
result = "res"
return result
half_story = foo1()
if half_story:
st.write(half_story)
def foo2(half_story,user_choice):
result = half_story + user_choice
return result
with st.form(key='story_game'):
# take all user inputs
text_input = st.text_input(label='Enter some text')
user_choice = st.selectbox("What would you like to do next?", ["Choice1", "Choice2"])
submit_button = st.form_submit_button(label='Submit Story')
# run functions if submit button is pressed
if submit_button:
half_story = foo1()
if half_story:
st.write(half_story)
if text_input and user_choice :
continued_story = foo2(text_input,user_choice)
else:
continued_story = ""
if continued_story:
st.markdown(continued_story)
else: # if not submitted yet, we need to initizalize continued_story to get rid of name error
continued_story = ""
#############################################################
Expand Down
67 changes: 26 additions & 41 deletions demogpt/chains/prompts/task_list/doc_load.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,49 @@
loaders = """
For Local TXT file:
from langchain.document_loaders import TextLoader
loader = TextLoader(<local_txt_file_path>)
TextLoader
################################
For Web Page:
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("<url>")
WebBaseLoader
################################
For Online PDF:
from langchain.document_loaders import OnlinePDFLoader
loader = OnlinePDFLoader("<online_pdf_url>")
OnlinePDFLoader
################################
For Local PDF:
from langchain.document_loaders import UnstructuredPDFLoader
loader = UnstructuredPDFLoader(
<local_pdf_full_path>, mode="elements", strategy="fast"
)
UnstructuredPDFLoader
################################
For Power Point:
from langchain.document_loaders import UnstructuredPowerPointLoader
loader = UnstructuredPowerPointLoader(
<local_powerpoint_file>, mode="elements", strategy="fast"
)
UnstructuredPowerPointLoader
################################
For CSV:
from langchain.document_loaders.csv_loader import UnstructuredCSVLoader
loader = UnstructuredCSVLoader(<csv_file_path>, mode="elements")
UnstructuredCSVLoader
################################
For Excel:
from langchain.document_loaders.excel import UnstructuredExcelLoader
loader = UnstructuredExcelLoader(<excel_file_path>, mode="elements")
UnstructuredExcelLoader
"""

loader_dict = {
"txt" : "TextLoader",
"web_page" : "WebBaseLoader",
"online_pdf" : "OnlinePDFLoader",
"pdf" :"UnstructuredPDFLoader",
"powerpoint" : "UnstructuredPowerPointLoader",
"csv" : "UnstructuredCSVLoader",
"excel" :"UnstructuredExcelLoader"
}

system_template = f"""
These are the Loader classes that you should select.
Select the loader according to the input type unless the input type is ambiguous.
Based on the provided context in 'Previous Code', choose the most appropriate loader.
These are your loader options:
{loaders}
"""

human_template = """
Write a loader function using langchain.document_loaders
to load the document for the argument name, variable and instruction
below like in the below format:
###
def {function_name}({argument}):
loader = Loader(path) # Select the appropriate Loader
docs = loader.load()
return docs
if {argument}:
{variable} = {function_name}({argument})
else:
{variable} = ''
###
While using the loader, don't change "mode" and "strategy" arguments, they need to be constant as stated.
If there are no such arguments, ignore it.
Instruction:{instruction}
Use the information from 'Previous Code' to determine the loader from one of the 7 loader options.
Don't write any explanation but directly say the loader option
Document Loader Code:
Instruction: {instruction}
Previous Code: {code_snippets}
Loader Option:
"""
1 change: 1 addition & 0 deletions demogpt/chains/prompts/task_list/ui_input_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
You will basically use file_uploader and get file path from it but nothing else.
Do not loose the file path and check if the file is uploaded. Otherwise, assign empty string to "{variable}"
Don't read the file, only get the file path
In the st.file_uploader, change type parameter compatible with the type of the expected file such as pdf, csv, ...
"""

human_template = """
Expand Down
33 changes: 25 additions & 8 deletions demogpt/chains/task_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,15 @@ def pathToContent(cls, task, code_snippets):
return utils.refine(code)

@classmethod
def promptTemplate(cls, task, code_snippets):
def promptTemplate(cls, task):
inputs = task["input_key"]
instruction = task["description"]

res = cls.getChain(
system_template=prompts.prompt_template.system_template,
human_template=prompts.prompt_template.human_template,
instruction=instruction,
inputs=inputs,
code_snippets=code_snippets,
inputs=inputs
)
res = res[res.find("{") : res.rfind("}") + 1]
return json.loads(res)
Expand Down Expand Up @@ -175,16 +174,34 @@ def docLoad(cls, task, code_snippets):
variable = task["output_key"]
function_name = task["task_name"]

code = cls.getChain(
loader = cls.getChain(
system_template=prompts.doc_load.system_template,
human_template=prompts.doc_load.human_template,
instruction=instruction,
argument=argument,
variable=variable,
function_name=function_name,
code_snippets=code_snippets,
)
return utils.refine(code)


if loader in ["TextLoader", "WebBaseLoader", "OnlinePDFLoader"]:
loader_line = f'loader = {loader}({argument})'
elif loader in ["UnstructuredPDFLoader", "UnstructuredPowerPointLoader"]:
loader_line = f'loader = {loader}({argument}, mode="elements", strategy="fast")'
elif loader in ["UnstructuredCSVLoader", "UnstructuredExcelLoader"]:
loader_line = f'loader = {loader}({argument}, mode="elements")'
else:
loader_line = f'loader = TextLoader({argument})'

code = f"""
def {function_name}({argument}):
{loader_line}
docs = loader.load()
return docs
if {argument}:
{variable} = {function_name}({argument})
else:
{variable} = ''
"""
return code

@classmethod
def stringToDoc(cls, task, code_snippets):
Expand Down
12 changes: 5 additions & 7 deletions demogpt/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def checkDTypes(tasks):
feedback += f"""
{name} expects all inputs as {reference_input} or none but the data type of {input_key} is {input_data_type} not {reference_input}. Please find another way.\n
"""
print("1:",)
else:
for res, data_type in zip(input_key, input_data_type):
if data_type != reference_input:
Expand All @@ -75,10 +74,8 @@ def checkDTypes(tasks):
return {"feedback": feedback, "valid": valid}


def checkPromptTemplates(templates, task):
human_template = templates["template"]
system_template = templates["system_template"]
templates = human_template + system_template
def checkPromptTemplates(templates, task, additional_inputs=[]):
templates = " ".join(list(templates.values()))
inputs = task["input_key"]
if inputs == "none":
inputs = []
Expand All @@ -87,8 +84,9 @@ def checkPromptTemplates(templates, task):
if inputs.startswith("["):
inputs = inputs[1:-1]
inputs = [var.strip() for var in inputs.split(",")]
template_inputs = inputs + additional_inputs
feedback = ""
for input_key in inputs:
for input_key in template_inputs:
if f"{{{input_key}}}" not in templates:
feedback += f"'{{{input_key}}}' is not included in any of the templates. You must add '{{{input_key}}}' inside of at least one of the templates.\n"

Expand All @@ -97,7 +95,7 @@ def checkPromptTemplates(templates, task):
matches = set(re.findall(r"\{([^}]+)\}", templates))

for match in matches:
if match not in inputs:
if match not in template_inputs:
feedback += f"'{{{match}}}' cannot be included nowhere in the templates. You must remove '{{{match}}}'.\n"

valid = len(feedback) == 0
Expand Down
56 changes: 31 additions & 25 deletions demogpt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,35 @@ def getFunctionNames(code):
pattern = r"def (\w+)\(.*\):"
return re.findall(pattern, code)


def getGenericPromptTemplateCode(task, iters):
res = ""
is_valid = False
task_type = task["task_type"]
prompt_func = TaskChains.promptTemplate if task_type == "prompt_template" else TaskChains.chat
finalizer_func = getPromptChatTemplateCode if task_type == "prompt_template" else getChatCode
additional_inputs = []
if task_type == "chat":
additional_inputs.append("chat_history")
res = prompt_func(task=task)
templates = {key:res.get(key) for key in res if "template" in key}
function_name = res.get("function_name")
variety = res.get("variety")
index = 0
while not is_valid:
check = checkPromptTemplates(templates, task, additional_inputs)
is_valid = check["valid"]
feedback = check["feedback"]
if not is_valid:
res = TaskChains.promptTemplateRefiner(res, feedback)
else:
break
index += 1
if index == iters:
break
res["function_name"] = function_name
res["variety"] = variety
return finalizer_func(res, task)

def getCodeSnippet(task, code_snippets, iters=10):
task = refineKeyTypeCompatiblity(task)
task_type = task["task_type"]
Expand All @@ -30,27 +58,8 @@ def getCodeSnippet(task, code_snippets, iters=10):
code = TaskChains.uiInputText(task=task, code_snippets=code_snippets)
elif task_type == "ui_output_text":
code = TaskChains.uiOutputText(task=task, code_snippets=code_snippets)
elif task_type == "prompt_template":
res = ""
is_valid = False
res = TaskChains.promptTemplate(task=task, code_snippets=code_snippets)
function_name = res.get("function_name")
variety = res.get("variety")
index = 0
while not is_valid:
check = checkPromptTemplates(res, task)
is_valid = check["valid"]
feedback = check["feedback"]
if not is_valid:
res = TaskChains.promptTemplateRefiner(res, feedback)
else:
break
index += 1
if index == iters:
break
res["function_name"] = function_name
res["variety"] = variety
code = getPromptChatTemplateCode(res, task)
elif task_type in ["prompt_template", "chat"]:
code = getGenericPromptTemplateCode(task, iters=iters)
elif task_type == "path_to_content":
code = TaskChains.pathToContent(task=task, code_snippets=code_snippets)
elif task_type == "doc_to_string":
Expand All @@ -63,9 +72,6 @@ def getCodeSnippet(task, code_snippets, iters=10):
code = TaskChains.docLoad(task=task, code_snippets=code_snippets)
elif task_type == "doc_summarizer":
code = TaskChains.summarize(task=task, code_snippets=code_snippets)
elif task_type == "chat":
template = TaskChains.chat(task=task)
code = getChatCode(template=template, task=task)
elif task_type == "ui_input_chat":
code = getChatInputCode(TaskChains.uiInputChat(task=task))
elif task_type == "ui_output_chat":
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "demogpt"
version = "1.2.6.3"
version = "1.2.6.4"
description = "Auto Gen-AI App Generator with the Power of Llama 2"
authors = ["Melih Unsal <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit f2f195b

Please sign in to comment.