Skip to content

Commit

Permalink
Feat/result (#62)
Browse files Browse the repository at this point in the history
* add first Results in Experiment Set

* fix repeat analyze?

* fix pb nb_experiment_succes

* drop inutil function

* add format show information

* add read me in overview

* add result by query in experiment

* add comment

* add refacto

* add css style

* change color

---------

Co-authored-by: Audrey_CLEVY <[email protected]>
Co-authored-by: Adrien D. <[email protected]>
  • Loading branch information
3 people authored Jan 2, 2025
1 parent 6f73f16 commit c1a1ebe
Show file tree
Hide file tree
Showing 7 changed files with 340 additions and 134 deletions.
20 changes: 10 additions & 10 deletions ui/demo_streamlit/app.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import streamlit as st
from routes import ROUTES, get_page
from utils import load_css

# Configuration
st.set_page_config(
page_title="EG1 - Évaluation",
page_icon="ui/demo_streamlit/static/images/eg1_logo.png",
layout="wide",
initial_sidebar_state="expanded",
)

pg = st.navigation([get_page(route) for route in ROUTES])
st.set_page_config(layout="wide")
custom_css = """
<style>
* {
font-size: 18px;
}
</style>
"""
st.markdown(custom_css, unsafe_allow_html=True)
load_css("style.css")

pg = st.navigation([get_page(route) for route in ROUTES])

pg.run()
78 changes: 78 additions & 0 deletions ui/demo_streamlit/static/css/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/* Color */
:root {
--principal-title: #000091;
--level2-title: #313178;
--level3-title: #273961;
--button-main: #000091;
--background-main: #F6F6F6;
--text-main: #161616;
}

/* System Police */
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif;
background-color: var(--background-main);
color: var(--text-main);
line-height: 1.5;
}

/* Title */
h1 {
color: var(--principal-title);
font-weight: 600;
}

h2 {
color: var(--level2-title);
font-weight: 600;
}

h3 {
color: var(--level3-title);
font-weight: 600;
}

/* Button */
.stButton>button {
background-color: var(--button-main);
color: white;
border-radius: 4px;
border: none;
padding: 0.5rem 1rem;
transition: background-color 0.2s ease;
}

.stButton>button:hover {
background-color: #1565c0;
}

/* Widgets */
.stSelectbox, .stTextInput {
border-radius: 4px;
border: 1px solid var(--button-main);
}

/* DataFrames */
.dataframe {
border-collapse: collapse;
width: 100%;
margin-bottom: 1rem;
}

.dataframe th {
background-color: var(--principal-title);
color: white;
padding: 0.5rem;
text-align: left;
}

.dataframe td {
border: 1px solid #e0e0e0;
padding: 0.5rem;
}

/* Sidebar */
.sidebar .sidebar-content {
background-color: white;
border-right: 1px solid #e0e0e0;
}
Binary file added ui/demo_streamlit/static/images/eg1_logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions ui/demo_streamlit/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import requests
import streamlit as st

Expand All @@ -19,3 +20,7 @@ def fetch(method, endpoint, data=None):
else:
st.error(f"Failed to fetch data from {endpoint}.")
return None

def load_css(file_name):
with open(os.path.join("ui", "demo_streamlit", "static", "css", file_name)) as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
6 changes: 2 additions & 4 deletions ui/demo_streamlit/views/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ def main():
datasets = fetch("get", "/datasets")
if not datasets:
return


# Main content
main_content, right_menu = st.columns([8, 2])

# Main content
with main_content:
with st.container():
st.write("""Avalaible datasets
Expand All @@ -25,7 +25,6 @@ def main():
for dataset in datasets:
when = datetime.fromisoformat(dataset["created_at"]).strftime("%d %B %Y")
with st.container():
# Add an anchor for navigation
st.markdown(
f"<div id='{dataset['name'].lower().replace(' ', '-')}'></div>",
unsafe_allow_html=True,
Expand All @@ -44,7 +43,6 @@ def main():
st.caption(f"Created the {when}")
st.divider()

# Navigation menu
with right_menu:
st.markdown("###### Quick Navigation")
for dataset in datasets:
Expand Down
207 changes: 105 additions & 102 deletions ui/demo_streamlit/views/experiments.py
Original file line number Diff line number Diff line change
@@ -1,134 +1,137 @@
import streamlit as st
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Optional
from utils import fetch


def fetch_all_experiments():
endpoint = "/experiments"
return fetch("get", endpoint)


def fetch_experiment_results(exp_id):
endpoint = f"/experiment/{exp_id}"
params = {"with_results": "true"}
return fetch("get", endpoint, params)


def process_experiment_results(experiment):
results = experiment.get("results", [])
df_metrics = {}

for metric_results in results:
metric_name = metric_results["metric_name"]
arr = np.array(
[x["score"] for x in metric_results["observation_table"] if pd.notna(x["score"])]
)

if len(arr) > 0:
df = pd.DataFrame(
[
[
np.mean(arr),
np.std(arr),
np.median(arr),
f"{arr.mean():.2f} ± {arr.std():.2f}",
len(arr),
]
],
columns=["mean", "std", "median", "mean_std", "support"],
)

df_metrics[metric_name] = df

from io import StringIO

# Constants for warning
FINISHED_STATUS = "finished"
UNKNOWN_DATASET = "Unknown Dataset"
UNKNOWN_MODEL = "Unknown Model"

@st.cache_data
def fetch_all_experiments() -> List[Dict]:
return fetch("get", "/experiments")

@st.cache_data
def fetch_experiment_results(exp_id: int) -> Dict:
return fetch("get", f"/experiment/{exp_id}", {"with_dataset": "true"})

def process_experiment_data(response: Dict) -> Tuple[Optional[pd.DataFrame], str, str]:
if not response:
return None, UNKNOWN_DATASET, UNKNOWN_MODEL

df = pd.read_json(StringIO(response["dataset"]["df"]))

if "answers" in response:
df["answer"] = df.index.map({answer["num_line"]: answer["answer"] for answer in response["answers"]})

if "results" in response:
for result in response["results"]:
metric_name = result["metric_name"]
df[f"result_{metric_name}"] = df.index.map({obs["num_line"]: obs["score"] for obs in result["observation_table"]})

return df, response.get("dataset", {}).get("name", UNKNOWN_DATASET), response.get("model", {}).get("name", UNKNOWN_MODEL)

def calculate_metric_stats(arr: np.array) -> Dict[str, float]:
return {
"mean": np.mean(arr),
"std": np.std(arr),
"median": np.median(arr),
"mean_std": f"{arr.mean():.2f} ± {arr.std():.2f}",
"support": len(arr)
}

def process_experiment_results(experiment: Dict) -> pd.DataFrame:
df_metrics = {
metric_results["metric_name"]: pd.DataFrame([calculate_metric_stats(np.array([x["score"] for x in metric_results["observation_table"] if pd.notna(x["score"])]))])
for metric_results in experiment.get("results", [])
if len([x["score"] for x in metric_results["observation_table"] if pd.notna(x["score"])]) > 0
}
return pd.DataFrame(
{metric_name: df["mean_std"].iloc[0] for metric_name, df in sorted(df_metrics.items())},
index=[experiment["name"]],
index=[experiment["name"]]
)


def display_all_experiments():
experiments = fetch_all_experiments()

if not experiments:
st.error("No experiments found.")
return

formatted_experiments = []

for exp in experiments:
if exp["experiment_status"] == "finished" and exp["experiment_set_id"] is None:
formatted_exp = {
"id": exp["id"],
"name": exp["name"],
"dataset": exp["dataset"]["name"],
"model": exp["model"]["name"] if exp["model"] else "N/A",
}

for result in exp.get("results", []):
metric_name = result["metric_name"]
scores = [
obs["score"] for obs in result["observation_table"] if obs["score"] is not None
]
if scores:
avg_score = sum(scores) / len(scores)
formatted_exp[f"{metric_name}_score"] = f"{avg_score:.2f}"

formatted_experiments.append(formatted_exp)

df = pd.DataFrame(formatted_experiments)

metric_columns = [col for col in df.columns if col.endswith("_score")]
df = df[df[metric_columns].notna().any(axis=1)]

st.dataframe(df)

if not df.empty:
selected_exp_id = st.selectbox(
"Select a finished experiment to view details:", df["id"].tolist()
)

if st.button("Show Selected Experiment Results"):
display_experiment_results(selected_exp_id)
else:
st.info("No finished experiments found.")


def display_experiment_results(exp_id):
@st.cache_data
def preprocess_experiments(experiments: List[Dict]) -> pd.DataFrame:
formatted_experiments = [
{
"id": exp["id"],
"name": exp["name"],
"dataset": exp["dataset"]["name"],
"model": exp["model"]["name"] if exp["model"] else "N/A",
**{f"{result['metric_name']}_score": f"{sum(obs['score'] for obs in result['observation_table'] if obs['score'] is not None) / len([obs for obs in result['observation_table'] if obs['score'] is not None]):.2f}"
for result in exp.get("results", []) if any(obs['score'] is not None for obs in result['observation_table'])}
}
for exp in experiments
if exp["experiment_status"] == FINISHED_STATUS and exp["experiment_set_id"] is None
]
return pd.DataFrame(formatted_experiments)

def display_experiment_results(exp_id: int):
experiment = fetch_experiment_results(exp_id)

if not experiment:
st.error(f"No results found for experiment {exp_id}")
return

if experiment["experiment_status"] != "finished":
if experiment["experiment_status"] != FINISHED_STATUS:
st.warning(f"Experiment {exp_id} is not finished yet...")

if experiment["num_success"] != experiment["num_try"]:
st.warning("Warning: some experiments are failed.")
st.warning("Warning: some experiments have failed.")
if experiment["num_observation_success"] != experiment["num_observation_try"]:
st.warning("Warning: some metrics are failed.")
st.warning("Warning: some metrics have failed.")

results_df = process_experiment_results(experiment)
df_with_results, dataset_name, model_name = process_experiment_data(experiment)

cols = st.columns(3)
cols[0].write(f"**Dataset:** {dataset_name}")
cols[1].write(f"**Model:** {model_name}")

if not results_df.empty:
st.subheader("Aggregated Results")
st.dataframe(results_df)

st.subheader("Detailed Results")
st.dataframe(df_with_results)
else:
st.info("No results available for this experiment.")


def main():
st.title("Experiments (not in a Set)")
st.info("Here, you can see the experiments that are not in evaluation sets. ")

options_button = ["View All Experiments (finished)", "View Experiment by ID"]
view_option = st.radio("Select View Option", options_button)
st.info("Here, you can see the experiments that are not in evaluation sets.")

if view_option == "View All Experiments (finished)":
display_all_experiments()
st.subheader("All Experiments (finished)")
experiments = fetch_all_experiments()

if not experiments:
st.error("No experiments found.")
else:
exp_id = st.number_input("Enter Experiment ID", min_value=1, step=1)
if st.button("Show Results"):
display_experiment_results(exp_id)
df = preprocess_experiments(experiments)

metric_columns = [col for col in df.columns if col.endswith("_score")]
df = df[df[metric_columns].notna().any(axis=1)]

st.dataframe(df)

st.divider()

if not df.empty:
st.markdown("### Select a finished experiment to view details:")
selected_exp_id = st.selectbox(
label="",
options=df["id"].tolist(),
format_func=lambda x: f"Experiment {x}",
label_visibility="collapsed"
)
if st.button("Show Selected Experiment Results"):
display_experiment_results(selected_exp_id)
else:
st.info("No finished experiments found.")

main()

Loading

0 comments on commit c1a1ebe

Please sign in to comment.