diff --git a/xpertai/prompts.py b/xpertai/prompts.py index b4e6c35..a5ecfd5 100644 --- a/xpertai/prompts.py +++ b/xpertai/prompts.py @@ -27,7 +27,7 @@ - Then, provide a summary of everything you described previously to describe the relationship between these features and the {observation}. You must sound like a scientist. Give scientific evidence for these answers and provide citations. -- Finally, provide the list of references used to answer. Use APA citation style for referencing. \n +- Finally, provide the list of references only used to answer. DO NOT make up references. Use APA style for referencing. \n Eg: References: \n 1. reference 1 \n 2. reference 2 \n diff --git a/xpertai/tools/explain_model.py b/xpertai/tools/explain_model.py index 2468d7e..7bd5153 100644 --- a/xpertai/tools/explain_model.py +++ b/xpertai/tools/explain_model.py @@ -10,6 +10,7 @@ def get_modelsummary(arg_dict): "model_type":, "top_k":, "XAI_tool": + "persist_directory": }. Example: @@ -22,6 +23,9 @@ def get_modelsummary(arg_dict): ''' save_dir = './data' + global persist_directory ; persist_directory = None + global clean ; clean = True + if not os.path.exists(save_dir): os.mkdir(save_dir) # arg_dict = json.loads(json_request) @@ -70,7 +74,8 @@ def get_modelsummary(arg_dict): f.close() metadata = {'Authors': 'XpertAI', 'Year': '2023', 'Title': 'XAI Summary'} - vector_db(lit_file=f'{save_dir}/XAI_summary.txt', clean=True, + vector_db(persist_directory=persist_directory, + lit_file=f'{save_dir}/XAI_summary.txt', clean=clean, metadatas=metadata) # Step 5: Generate summary of model explanation diff --git a/xpertai/tools/generate_nle.py b/xpertai/tools/generate_nle.py index 233ae02..b34fd80 100644 --- a/xpertai/tools/generate_nle.py +++ b/xpertai/tools/generate_nle.py @@ -11,6 +11,7 @@ def gen_nle(arg_dict): {"observation":, "XAI_tool": , "top_k": + "persist_directory": } Example: @@ -20,6 +21,7 @@ def gen_nle(arg_dict): ''' save_dir = './data' + global persist_directory ; persist_directory = "./data/chroma/" # arg_dict = json.loads(json_request) for k, val in arg_dict.items(): globals()[k] = val @@ -61,7 +63,7 @@ def gen_nle(arg_dict): # get human interpretable feature labels # #initiate retriever, chain llm = ChatOpenAI( - temperature=0.1, + temperature=0.0, model_name="gpt-4", request_timeout=1000) @@ -78,22 +80,19 @@ def gen_nle(arg_dict): # generate NLEs with citations features = ','.join(new_labels) + db = Chroma(persist_directory=persist_directory, + embedding_function=embedding) docs = [] # first collect docs for each feature for feature in new_labels: - initial_question = f"""It has been identified by XAI analysis {feature} have an impact on the {observation}. \n - Your task is to explain how the {observation} is affected by the {feature}. \ - How does each of these features impact the {observation}? - """ + initial_question = f"""How does the {feature} impact the {observation}?""" # Get relevant docs - - db = Chroma(persist_directory="./data/chroma/", - embedding_function=embedding) - - docs.append(db.max_marginal_relevance_search(initial_question)) + fetched = db.max_marginal_relevance_search(initial_question,k=4) + docs.append(fetched) # flatten list of docs docs = [item for sublist in docs for item in sublist] + # add citations from metadata documents = "" for i in range(len(docs)): @@ -101,7 +100,9 @@ def gen_nle(arg_dict): try: authors = docs[i].metadata["authors"] year = docs[i].metadata["year"] - documents += f"{doc} ({authors},{year}) \n\n" + title = docs[i].metadata["source"] + documents += f"{doc} REFERENCE:({authors},{year},{title}) \n\n" + except BaseException: documents += f"{doc} \n\n" diff --git a/xpertai/tools/utils.py b/xpertai/tools/utils.py index 7ed03d5..cdc0971 100644 --- a/xpertai/tools/utils.py +++ b/xpertai/tools/utils.py @@ -271,8 +271,8 @@ def explain_lime(df_init, model_path, model_type, top_k=2, def load_split_docs(filename, meta_data=None): r_splitter = RecursiveCharacterTextSplitter( - chunk_size=1500, - chunk_overlap=200, + chunk_size=500, + chunk_overlap=50, length_function=len ) docs = None @@ -293,8 +293,8 @@ def load_split_docs(filename, meta_data=None): return docs_split -def _create_vecdb(docs_split, persist_directory): - embedding = OpenAIEmbeddings() +def _create_vecdb(docs_split, persist_directory, embedding=None): + if embedding is None: embedding = OpenAIEmbeddings() vectordb = Chroma.from_documents( documents=docs_split, @@ -304,8 +304,9 @@ def _create_vecdb(docs_split, persist_directory): vectordb.persist() -def _update_vecdb(docs_split, persist_directory): - embedding = OpenAIEmbeddings() +def _update_vecdb(docs_split, persist_directory,embedding=None): + if embedding is None: embedding = OpenAIEmbeddings() + vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) @@ -340,7 +341,8 @@ def _get_metadata(lit_file): def vector_db(persist_directory=None, lit_file=None, clean=False, try_meta_data=False, - metadatas=None): + metadatas=None, + embedding=None): if persist_directory is None: persist_directory = "./data/chroma/" @@ -355,7 +357,7 @@ def vector_db(persist_directory=None, if os.path.exists(persist_directory): shutil.rmtree(persist_directory) os.mkdir(persist_directory) - _create_vecdb(text_split, persist_directory) + _create_vecdb(text_split, persist_directory,embedding=embedding) else: - _update_vecdb(text_split, persist_directory) + _update_vecdb(text_split, persist_directory,embedding=embedding)