-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloader.py
382 lines (313 loc) · 16.1 KB
/
loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
import os
from llama_hub.file.base import SimpleDirectoryReader
from llama_index import GPTVectorStoreIndex
import openai
from datetime import datetime, timedelta
from pydantic import BaseModel
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from langchain.agents import AgentExecutor, initialize_agent, Tool, AgentType
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA, LLMChain, MapReduceChain, create_tagging_chain, summarize
from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chains.mapreduce import MapReduceChain
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.llm import LLMChain
from langchain.document_loaders import GoogleDriveLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import (
ChatPromptTemplate,
PromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
StringPromptTemplate
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
from langchain import PromptTemplate, OpenAI, LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.docstore.document import Document
#from conn_gpt import query_database, get_db_schema
from graph_gt import open_graph
from business_analysis import business_analysis
from langchain.prompts import MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain.tools import BaseTool, StructuredTool, Tool, tool
from langchain import LLMMathChain, SerpAPIWrapper
from langchain.chains import PALChain
import json
import os
import json
from typing import List, Dict, Any
from pydantic import BaseModel, Field
os.environ["OPENAI_API_KEY"] = ""
openai.api_key = os.environ["OPENAI_API_KEY"]
GPT_MODEL = "gpt-3.5-turbo-0613"
loader = SimpleDirectoryReader('./data/documents', recursive=True, exclude_hidden=True)
documents = loader.load_data()
index = GPTVectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
llm_normal = ChatOpenAI(temperature=0, model_name=GPT_MODEL)
normal_chat_template = """
You are a personal assistant for AltheaSuite, a cloud-based ERP. Your goal is to assist the user in completing their tasks.
Please respond with a nice tone and maintain friendly conversation.
If the user asks for their past conversation history, respond based only on the history from {conversation_history}.
The user input is {input}.
"""
prompt_for_normal_chat = PromptTemplate(
input_variables=["input", "conversation_history"],
template=normal_chat_template,
validate_template=False
)
normal_chat_chain = LLMChain(llm=llm_normal, prompt=prompt_for_normal_chat)
llm = ChatOpenAI(temperature=0, model_name=GPT_MODEL)
#schema=get_db_schema()
def save_data_for_graph(data):
filename='graph_data.csv' # Change the file extension to .csv
data = data.split('\n') # Split the data into a list of strings
with open(filename, 'w') as f:
# Assume the first row of the data contains the column names
column_names = data[0].split(',')
f.write(f"{column_names[0]},{column_names[1]}\n") # Write the header
# Write the data points
for row in data[1:]:
x, y = row.split(',')
f.write(f"{x},{y}\n") # Use a comma instead of a tab
return f"Data saved to {filename} for graph generation."
def save_data_for_analysis(data):
filename='output_data.csv' # Change the file extension to .csv
data = data.split('\n') # Split the data into a list of strings
with open(filename, 'w') as f:
# Assume the first row of the data contains the column names
column_names = data[0].split(',')
f.write(','.join(column_names) + "\n") # Write the header
# Write the data points
for row in data[1:]:
values = row.split(',')
f.write(','.join(values) + "\n") # Use a comma instead of a tab
return f"Data saved to {filename} for analysis."
llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)
toolsdb = [
Tool(
name="OpenGraph",
func=open_graph,
description="The function to use second when you have to plot a graph. Use it after calling the \"SaveDataForGraph\" function. Your job is to figure out what the graph is supposed to be. Send a string saying either /'pie/', /'hist/' or '/box/', depending on whether the user asked for a pie graph, histogram graph or box plot graph respectively. If you're not able to figure out what kind of graph the user wants, ask them to clarify between the three graphs mentioned. You do not generate the graph, the function does, inform the user of the same",
),
# Tool(
# name="QueryDatabase",
# func=query_database,
# description=f"This function queries the MySQL database. Use the schema ({schema})",
# ),
Tool(
name="SaveDataForGraph",
func=save_data_for_graph,
description="The function to use first when asked to plot a graph. This function takes a string, where each line represents a data point in 'x,y' format. The first line should contain the column names. The function saves the data to a CSV file, which can then be used to generate a graph. If the input is already a string dont make an outer string.",
),
Tool(
name="SaveDataForAnalysis",
func=save_data_for_analysis,
description="The function to use first when asked to perform analysis. This function takes a string, where each line represents a data point. The first line should contain the column names. The function saves the data to a CSV file, which can then be used for analysis. This function can handle any number of columns. If the input is already a string dont make an outer string.",
),
Tool(
name="BusinessAnalysis",
func=business_analysis,
description="The function to use second when asked to perform analysis. This function takes a single string which is the filename. Use it to find useful outputs for user to understand their data. ALWAYS do your own analysis too."
),
Tool.from_function(
name="Calculator",
func=llm_math_chain.run,
description="Use whenever you need to perform math operations. This function takes a string with the calculation to be performed worded in natural language.",
)
]
agent_kwargs = {
"extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
}
memory = ConversationBufferMemory(memory_key="memory", return_messages=True)
dbagent = initialize_agent(toolsdb, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True,agent_kwargs=agent_kwargs, memory=memory)
def format_query(query_type, input_text, conversation_history):
conversation_history_str = " ".join(conversation_history)
return {'conversation_history': conversation_history, 'input': prompt_for_normal_chat.format(input=input_text, conversation_history=conversation_history_str)}
tagging_schema_main = {
"properties": {
"mode": {
"type": "string",
"enum": ["normal chat", "documents", "data", "tasks"],# "graph"],
"description": "Determines the mode of operation. 'normal chat' for regular conversation, 'documents' for document retrieval, 'data' for data based operations and 'tasks' for discussing tasks"
}
}
}
llm_normal = ChatOpenAI(temperature=0, model_name=GPT_MODEL)
tagging_chain = create_tagging_chain(tagging_schema_main, llm_normal)
def save_history():
summary = summarize_history()
with open('past_history.txt', 'w') as f:
f.write(summary)
def summarize_history():
llm = OpenAI(temperature=0)
text_splitter = CharacterTextSplitter()
with open("past_history.txt") as f:
past_history = f.read()
texts = text_splitter.split_text(past_history) + conversation_history
docs = [Document(page_content=t) for t in texts[:3]]
summary_template = """Write a summary of the input.
Ensure it doesnt contain summaries about the conversation but rather about the exact numbers discussed or the exact task discussed in one or two lines.
The input is: {text}. """
PROMPT_summary = PromptTemplate(template=summary_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="stuff", prompt=PROMPT_summary)
summary = chain.run(docs)
return summary
class Task:
def __init__(self, task_id, description, roles, assigned_to):
self.task_id = task_id
self.description = description
self.roles = roles
self.assigned_to = assigned_to
def complete(self):
self.is_completed = True
class Employee:
def __init__(self, name, role, employees):
self.name = name
self.role = role
self.employees = employees
def load_tasks_from_json(filename, employee_name, role, employees):
with open(filename) as f:
tasks_data = json.load(f)
tasks = {}
for task_data in tasks_data:
assigned_to = task_data.get('assigned_to', '')
task_role = task_data.get('roles', [])
# If the employee is the one the task is assigned to
if employee_name == assigned_to:
task = Task(task_id=task_data.get('id', ''), description=task_data.get('description', ''), roles=task_role, assigned_to=assigned_to)
tasks[task.task_id] = task
# If the employee is a manager and the task is assigned to one of their subordinates
elif role == "manager" and assigned_to in [e['name'] for e in employees if e['manager'] == employee_name]:
task = Task(task_id=task_data.get('id', ''), description=task_data.get('description', ''), roles=task_role, assigned_to=assigned_to)
tasks[task.task_id] = task
# If the employee is an executive, they have access to their own tasks and all managers' tasks
elif role == "executive" and assigned_to != employee_name and 'executive' not in task_role:
task = Task(task_id=task_data.get('id', ''), description=task_data.get('description', ''), roles=task_role, assigned_to=assigned_to)
tasks[task.task_id] = task
return tasks
def load_employee_role_from_json(filename, employee_name):
with open(filename) as f:
employees_data = json.load(f)
for employee_data in employees_data:
if 'name' in employee_data and employee_name == employee_data['name']:
return employee_data.get('role', ''), employees_data
return None, None
def add_task(task_description: str, assigned_to=None, roles=None):
if assigned_to is None:
assigned_to = employee.name
if roles is None:
roles = employee.role
tasks = load_tasks_from_json('tasks.json', employee.name, employee.role, employee.employees)
# Compute the new task_id
if tasks:
max_task_id = max(int(task_id) for task_id in tasks.keys())
task_id = str(max_task_id + 1)
else:
task_id = "1"
new_task = {"id": task_id, "description": task_description, "assigned_to": assigned_to, "roles": [roles]}
tasks_data = []
with open('tasks.json', 'r') as f:
tasks_data = json.load(f)
tasks_data.append(new_task)
with open('tasks.json', 'w') as f:
json.dump(tasks_data, f)
return f"Task with id {task_id} added successfully."
def delete_task(task_description: str):
tasks_data = []
with open('tasks.json', 'r') as f:
tasks_data = json.load(f)
updated_tasks_data = [task for task in tasks_data if task['description'] != task_description]
if len(tasks_data) == len(updated_tasks_data):
return f"No task with the description '{task_description}' found."
else:
with open('tasks.json', 'w') as f:
json.dump(updated_tasks_data, f)
return f"Task with description '{task_description}' deleted successfully."
def list_tasks(str):
tasks = load_tasks_from_json('tasks.json', employee.name, employee.role, employee.employees)
tasks_str = "\n".join([f"Task ID: {task_id}, Description: {task.description}, Assigned to: {task.assigned_to}" for task_id, task in tasks.items()])
return tasks_str
toolstask = [
Tool(
name="AddTask",
func=add_task,
description="Call this function ONLY after calling ListTasks to understand task structure. This function adds a new task to the task management system. Normally it takes only a string input of task_description. If assigned to and roles are mentioned in query then it takes a string input which should be formatted as 'task_description,assigned_to,roles'. Each field is separated by a comma."
),
Tool(
name="DeleteTask",
func=delete_task,
description="Call this function ONLY after calling ListTasks to look at the task descriptions. This function deletes a task from the task management system. It takes a string input which should be the EXACT MATCHING task description of the task to be deleted."
),
Tool(
name="ListTasks",
func=list_tasks,
description="This function retrieves a list of all tasks from the task management system. It takes a single string as input which is the exact query made by the user. DO NOT FORGET TO SEND THE INPUT"
)
]
agent_kwargs_tasks = {
"extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
}
memory_tasks = ConversationBufferMemory(memory_key="memory", return_messages=True)
taskagent = initialize_agent(toolstask, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True,agent_kwargs=agent_kwargs_tasks, memory=memory_tasks)
current_mode = "chat"
conversation_history = []
def get_answer(query, employee):
# Extract employee details
employee_name = employee.name
employee_role = employee.role
employees = employee.employees
global current_mode
global conversation_history
conversation_history.append(f"User: {query}")
answer = ""
if query.lower() == "exit":
current_mode = "chat"
answer = f"Switching to mode: {current_mode}"
else:
if current_mode == "chat":
tagged_input = tagging_chain.run(query)
query_type = tagged_input.get('mode', current_mode)
if query_type in ["documents", "data", "tasks"]:
current_mode = query_type
answer = f"Switching to mode: {current_mode}"
if current_mode == "documents":
answer += "\n" + query_engine.query(query)
elif current_mode == "data":
answer += "\n" + dbagent.run(query)
elif current_mode == "tasks":
answer += "\n" + taskagent.run(query)
else:
formatted_prompt = format_query(query_type, query, conversation_history)
answer = normal_chat_chain.run(formatted_prompt)
elif current_mode == "documents":
answer = query_engine.query(query)
elif current_mode == "data":
answer = dbagent.run(query)
elif current_mode == "tasks":
answer = taskagent.run(query)
conversation_history.append(f"Bot: {answer}")
save_history()
return answer
employee_name = input("Please enter your name:\n> ")
employee_role, employees = load_employee_role_from_json('employees.json', employee_name)
employee = Employee(employee_name, employee_role, employees)
while True:
query = input(">")
answer = get_answer(query, employee)
print(answer)