Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds ragas metrics #20

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 117 additions & 24 deletions pages/2_🗂️_Annual_Report_Analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,24 @@
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.core.schema import Document
from llama_index.core.node_parser import UnstructuredElementNodeParser

from ragas.metrics import (
answer_relevancy,
faithfulness,
context_recall,
context_precision,
)
from datasets import Dataset
from ragas import evaluate
import pandas as pd
from src.utils import get_model

from src.fields2 import (
fiscal_year, fiscal_year_attributes,
strat_outlook, strat_outlook_attributes,
risk_management, risk_management_attributes,
innovation, innovation_attributes
innovation, innovation_attributes,fiscal_year_questions,
strat_outlook_questions,risk_management_questions,
innovation_questions
)

import streamlit as st
Expand Down Expand Up @@ -66,6 +76,8 @@ def get_vector_index(nodes, vector_store):





def generate_insight(engine, insight_name, section_name, output_format):

with open("prompts/report.prompt", "r") as f:
Expand All @@ -79,41 +91,63 @@ def generate_insight(engine, insight_name, section_name, output_format):
formatted_input = prompt_template.format(insight_name=insight_name, section_name=section_name, output_format=output_format)
print(formatted_input)
response = engine.query(formatted_input)
return response.response
# print(len(response.source_nodes))
return response



def report_insights(engine, section_name, fields_to_include, section_num):
fields = None
attribs = None
questions=None

if section_num == 1:
fields = fiscal_year
attribs = fiscal_year_attributes
questions=fiscal_year_questions
elif section_num == 2:
fields = strat_outlook
attribs = strat_outlook_attributes
questions=strat_outlook_questions
elif section_num == 3:
fields = risk_management
attribs = risk_management_attributes
questions=risk_management_questions
elif section_num == 4:
fields = innovation
attribs = innovation_attributes

questions=innovation_questions
rageseval={}
rageseval["answer"]=[]
rageseval["contexts"]=[]
rageseval["question"]=[]
rageseval["ground_truth"]=[]
ins = {}
engine2=get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3),"openai-1")
for i, field in enumerate(attribs):
if fields_to_include[i]:
response = generate_insight(engine, field, section_name, str({field: fields[field]}))
ins[field] = response

gtobject=generate_insight(engine2, field, section_name, str({field: fields[field]}))
ins[field] = response.response
rageseval["answer"].append(response.response)
source_node=response.source_nodes[0]
text_node = source_node.node
context=[text_node.text]
rageseval["contexts"].append(context)
rageseval["question"].append(questions[field])
rageseval["ground_truth"].append(gtobject.response)

print("Printing ragesval : ")
print(rageseval)
return {
"insights": ins
}
},rageseval

def get_query_engine(engine):
llm = get_model("openai")
def get_query_engine(engine,model_name):
llm = get_model(model_name)
service_context = ServiceContext.from_defaults(llm=llm)


query_engine_tools = [
QueryEngineTool(
query_engine=engine,
Expand Down Expand Up @@ -213,24 +247,36 @@ def get_query_engine(engine):
innovation_focus = st.toggle("Innovation Focus")

innovation_and_rd_list = [r_and_d_activities, innovation_focus]


results={}
result=None
with col2:
if st.button("Analyze Report"):
engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3),"openai")
print(st.session_state.index.service_context)
start_time = time.time()

with st.status("**Analyzing Report...**"):

if any(fiscal_year_highlights_list):
st.write("Fiscal Year Highlights...")

for i, insight in enumerate(fiscal_year_attributes):
if st.session_state[insight]:
fiscal_year_highlights_list[i] = False

response = report_insights(engine, "Fiscal Year Highlights", fiscal_year_highlights_list, 1)

print(insight)
response,ragesval = report_insights(engine, "Fiscal Year Highlights", fiscal_year_highlights_list, 1)
dataframe=pd.DataFrame(ragesval)
df=Dataset.from_dict(dataframe)
result = evaluate(
df,
metrics=[
context_precision,
faithfulness,
answer_relevancy,
context_recall,
],
)
results["Fiscal Year Highlights"]=result
print("Printing Results: ")
print(results)
for key, value in response["insights"].items():
st.session_state[key] = value

Expand All @@ -240,8 +286,21 @@ def get_query_engine(engine):
for i, insight in enumerate(strat_outlook_attributes):
if st.session_state[insight]:
strategy_outlook_future_direction_list[i] = False
response = report_insights(engine, "Strategy Outlook and Future Direction", strategy_outlook_future_direction_list, 2)

response,ragesval = report_insights(engine, "Strategy Outlook and Future Direction", strategy_outlook_future_direction_list, 2)
dataframe=pd.DataFrame(ragesval)
df=Dataset.from_dict(dataframe)
result = evaluate(
df,
metrics=[
context_precision,
faithfulness,
answer_relevancy,
context_recall,
],
)
results["Strategy Outlook and Future Direction"]=result
print("Printing Results: ")
print(results)
for key, value in response["insights"].items():
st.session_state[key] = value

Expand All @@ -253,8 +312,21 @@ def get_query_engine(engine):
if st.session_state[insight]:
risk_management_list[i] = False

response = report_insights(engine, "Risk Management", risk_management_list, 3)

response,ragesval = report_insights(engine, "Risk Management", risk_management_list, 3)
dataframe=pd.DataFrame(ragesval)
df=Dataset.from_dict(dataframe)
result = evaluate(
df,
metrics=[
context_precision,
faithfulness,
answer_relevancy,
context_recall,
],
)
results["Risk Management"]=result
print("Printing Results: ")
print(results)
for key, value in response["insights"].items():
st.session_state[key] = value

Expand All @@ -265,7 +337,22 @@ def get_query_engine(engine):
if st.session_state[insight]:
innovation_and_rd_list[i] = False

response = report_insights(engine, "Innovation and R&D", innovation_and_rd_list, 4)
response,ragesval = report_insights(engine, "Innovation and R&D", innovation_and_rd_list, 4)
dataframe=pd.DataFrame(ragesval)
df=Dataset.from_dict(dataframe)
result = evaluate(
df,
metrics=[
context_precision,
faithfulness,
answer_relevancy,
context_recall,
],
)
results["Innovation and R&D"]=result
print("Printing Results: ")
print(results)

st.session_state.innovation_and_rd = response

for key, value in response["insights"].items():
Expand Down Expand Up @@ -314,7 +401,8 @@ def get_query_engine(engine):
st.error("This insight has not been generated")
# st.write("### Milestone Achievements")
# st.write(str(st.session_state.fiscal_year_highlights.milestone_achievements))

if results:
st.write(results["Fiscal Year Highlights"].to_pandas())
with tab2:
st.write("## Strategy Outlook and Future Direction")
try:
Expand Down Expand Up @@ -347,6 +435,8 @@ def get_query_engine(engine):
st.error("Product Roadmap insight has not been generated")
except:
st.error("This insight has not been generated")
if results:
st.write(results["Strategy Outlook and Future Direction"].to_pandas())

with tab3:
st.write("## Risk Management")
Expand All @@ -369,7 +459,8 @@ def get_query_engine(engine):
st.error("Risk Mitigation insight has not been generated")
except:
st.error("This insight has not been generated")

if results:
st.write(results["Risk Management"].to_pandas())

with tab4:
st.write("## Innovation and R&D")
Expand All @@ -392,3 +483,5 @@ def get_query_engine(engine):
st.error("Innovation Focus insight has not been generated")
except:
st.error("This insight has not been generated")
if results:
st.write(results["Innovation and R&D"].to_pandas())
16 changes: 15 additions & 1 deletion src/fields2.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@

fiscal_year_attributes = ["performance_highlights", "major_events", "challenges_encountered"]

fiscal_year_questions={"performance_highlights": "What are the performance highlights?"
,"major_events":"Describe the major events and their impact ?" ,
"challenges_encountered": "What are the challenges encountered in this financial year ?"}

strat_outlook = {
"strategic_initiatives": "The company's primary objectives and growth strategies for the upcoming years.",
"market_outlook": "Insights into the broader market, competitive landscape, and industry trends the company anticipates.",
Expand All @@ -46,16 +50,26 @@

strat_outlook_attributes = ["strategic_initiatives", "market_outlook", "product_roadmap"]

strat_outlook_questions={"strategic_initiatives":"What are the Strategic initiatives being discussed for the fiscal year",
"market_outlook":"What is the market outlook for the fiscal year",
"product_roadmap":"Elaborate on the product roadmap for the fiscal year"}

risk_management = {
"risk_factors": "Primary risks the company acknowledges.",
"risk_mitigation": "Strategies for managing these risks."
}

risk_management_attributes = ["risk_factors", "risk_mitigation"]

risk_management_questions={"risk_factors":"What are the risk factors faced by the company ?",
"risk_mitigation":"What are the measures being taken to mitigate the risks ? "}

innovation = {
"r_and_d_activities": "Overview of the company's focus on research and development, major achievements, or breakthroughs.",
"innovation_focus": "Mention of new technologies, patents, or areas of research the company is diving into."
}

innovation_attributes = ["r_and_d_activities", "innovation_focus"]
innovation_attributes = ["r_and_d_activities", "innovation_focus"]

innovation_questions={"r_and_d_activities":"Provide an overview on all the Research and Development activities,breakthroughs and acheievment",
"innovation_focus":" What are the new technologies,patents and areas of research the company is working on ?"}
2 changes: 2 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def get_model(model_name):
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
if model_name == "openai":
model = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")
else:
model=ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4o")
return model

def process_pdf(pdfs):
Expand Down