-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add first Results in Experiment Set * fix repeat analyze? * fix pb nb_experiment_succes * drop inutil function * add format show information * add read me in overview * add result by query in experiment * add comment * add refacto * add css style * change color --------- Co-authored-by: Audrey_CLEVY <[email protected]> Co-authored-by: Adrien D. <[email protected]>
- Loading branch information
1 parent
6f73f16
commit c1a1ebe
Showing
7 changed files
with
340 additions
and
134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,17 @@ | ||
import streamlit as st | ||
from routes import ROUTES, get_page | ||
from utils import load_css | ||
|
||
# Configuration | ||
st.set_page_config( | ||
page_title="EG1 - Évaluation", | ||
page_icon="ui/demo_streamlit/static/images/eg1_logo.png", | ||
layout="wide", | ||
initial_sidebar_state="expanded", | ||
) | ||
|
||
pg = st.navigation([get_page(route) for route in ROUTES]) | ||
st.set_page_config(layout="wide") | ||
custom_css = """ | ||
<style> | ||
* { | ||
font-size: 18px; | ||
} | ||
</style> | ||
""" | ||
st.markdown(custom_css, unsafe_allow_html=True) | ||
load_css("style.css") | ||
|
||
pg = st.navigation([get_page(route) for route in ROUTES]) | ||
|
||
pg.run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* Color */ | ||
:root { | ||
--principal-title: #000091; | ||
--level2-title: #313178; | ||
--level3-title: #273961; | ||
--button-main: #000091; | ||
--background-main: #F6F6F6; | ||
--text-main: #161616; | ||
} | ||
|
||
/* System Police */ | ||
body { | ||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif; | ||
background-color: var(--background-main); | ||
color: var(--text-main); | ||
line-height: 1.5; | ||
} | ||
|
||
/* Title */ | ||
h1 { | ||
color: var(--principal-title); | ||
font-weight: 600; | ||
} | ||
|
||
h2 { | ||
color: var(--level2-title); | ||
font-weight: 600; | ||
} | ||
|
||
h3 { | ||
color: var(--level3-title); | ||
font-weight: 600; | ||
} | ||
|
||
/* Button */ | ||
.stButton>button { | ||
background-color: var(--button-main); | ||
color: white; | ||
border-radius: 4px; | ||
border: none; | ||
padding: 0.5rem 1rem; | ||
transition: background-color 0.2s ease; | ||
} | ||
|
||
.stButton>button:hover { | ||
background-color: #1565c0; | ||
} | ||
|
||
/* Widgets */ | ||
.stSelectbox, .stTextInput { | ||
border-radius: 4px; | ||
border: 1px solid var(--button-main); | ||
} | ||
|
||
/* DataFrames */ | ||
.dataframe { | ||
border-collapse: collapse; | ||
width: 100%; | ||
margin-bottom: 1rem; | ||
} | ||
|
||
.dataframe th { | ||
background-color: var(--principal-title); | ||
color: white; | ||
padding: 0.5rem; | ||
text-align: left; | ||
} | ||
|
||
.dataframe td { | ||
border: 1px solid #e0e0e0; | ||
padding: 0.5rem; | ||
} | ||
|
||
/* Sidebar */ | ||
.sidebar .sidebar-content { | ||
background-color: white; | ||
border-right: 1px solid #e0e0e0; | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,134 +1,137 @@ | ||
import streamlit as st | ||
import pandas as pd | ||
import numpy as np | ||
from typing import Dict, List, Tuple, Optional | ||
from utils import fetch | ||
|
||
|
||
def fetch_all_experiments(): | ||
endpoint = "/experiments" | ||
return fetch("get", endpoint) | ||
|
||
|
||
def fetch_experiment_results(exp_id): | ||
endpoint = f"/experiment/{exp_id}" | ||
params = {"with_results": "true"} | ||
return fetch("get", endpoint, params) | ||
|
||
|
||
def process_experiment_results(experiment): | ||
results = experiment.get("results", []) | ||
df_metrics = {} | ||
|
||
for metric_results in results: | ||
metric_name = metric_results["metric_name"] | ||
arr = np.array( | ||
[x["score"] for x in metric_results["observation_table"] if pd.notna(x["score"])] | ||
) | ||
|
||
if len(arr) > 0: | ||
df = pd.DataFrame( | ||
[ | ||
[ | ||
np.mean(arr), | ||
np.std(arr), | ||
np.median(arr), | ||
f"{arr.mean():.2f} ± {arr.std():.2f}", | ||
len(arr), | ||
] | ||
], | ||
columns=["mean", "std", "median", "mean_std", "support"], | ||
) | ||
|
||
df_metrics[metric_name] = df | ||
|
||
from io import StringIO | ||
|
||
# Constants for warning | ||
FINISHED_STATUS = "finished" | ||
UNKNOWN_DATASET = "Unknown Dataset" | ||
UNKNOWN_MODEL = "Unknown Model" | ||
|
||
@st.cache_data | ||
def fetch_all_experiments() -> List[Dict]: | ||
return fetch("get", "/experiments") | ||
|
||
@st.cache_data | ||
def fetch_experiment_results(exp_id: int) -> Dict: | ||
return fetch("get", f"/experiment/{exp_id}", {"with_dataset": "true"}) | ||
|
||
def process_experiment_data(response: Dict) -> Tuple[Optional[pd.DataFrame], str, str]: | ||
if not response: | ||
return None, UNKNOWN_DATASET, UNKNOWN_MODEL | ||
|
||
df = pd.read_json(StringIO(response["dataset"]["df"])) | ||
|
||
if "answers" in response: | ||
df["answer"] = df.index.map({answer["num_line"]: answer["answer"] for answer in response["answers"]}) | ||
|
||
if "results" in response: | ||
for result in response["results"]: | ||
metric_name = result["metric_name"] | ||
df[f"result_{metric_name}"] = df.index.map({obs["num_line"]: obs["score"] for obs in result["observation_table"]}) | ||
|
||
return df, response.get("dataset", {}).get("name", UNKNOWN_DATASET), response.get("model", {}).get("name", UNKNOWN_MODEL) | ||
|
||
def calculate_metric_stats(arr: np.array) -> Dict[str, float]: | ||
return { | ||
"mean": np.mean(arr), | ||
"std": np.std(arr), | ||
"median": np.median(arr), | ||
"mean_std": f"{arr.mean():.2f} ± {arr.std():.2f}", | ||
"support": len(arr) | ||
} | ||
|
||
def process_experiment_results(experiment: Dict) -> pd.DataFrame: | ||
df_metrics = { | ||
metric_results["metric_name"]: pd.DataFrame([calculate_metric_stats(np.array([x["score"] for x in metric_results["observation_table"] if pd.notna(x["score"])]))]) | ||
for metric_results in experiment.get("results", []) | ||
if len([x["score"] for x in metric_results["observation_table"] if pd.notna(x["score"])]) > 0 | ||
} | ||
return pd.DataFrame( | ||
{metric_name: df["mean_std"].iloc[0] for metric_name, df in sorted(df_metrics.items())}, | ||
index=[experiment["name"]], | ||
index=[experiment["name"]] | ||
) | ||
|
||
|
||
def display_all_experiments(): | ||
experiments = fetch_all_experiments() | ||
|
||
if not experiments: | ||
st.error("No experiments found.") | ||
return | ||
|
||
formatted_experiments = [] | ||
|
||
for exp in experiments: | ||
if exp["experiment_status"] == "finished" and exp["experiment_set_id"] is None: | ||
formatted_exp = { | ||
"id": exp["id"], | ||
"name": exp["name"], | ||
"dataset": exp["dataset"]["name"], | ||
"model": exp["model"]["name"] if exp["model"] else "N/A", | ||
} | ||
|
||
for result in exp.get("results", []): | ||
metric_name = result["metric_name"] | ||
scores = [ | ||
obs["score"] for obs in result["observation_table"] if obs["score"] is not None | ||
] | ||
if scores: | ||
avg_score = sum(scores) / len(scores) | ||
formatted_exp[f"{metric_name}_score"] = f"{avg_score:.2f}" | ||
|
||
formatted_experiments.append(formatted_exp) | ||
|
||
df = pd.DataFrame(formatted_experiments) | ||
|
||
metric_columns = [col for col in df.columns if col.endswith("_score")] | ||
df = df[df[metric_columns].notna().any(axis=1)] | ||
|
||
st.dataframe(df) | ||
|
||
if not df.empty: | ||
selected_exp_id = st.selectbox( | ||
"Select a finished experiment to view details:", df["id"].tolist() | ||
) | ||
|
||
if st.button("Show Selected Experiment Results"): | ||
display_experiment_results(selected_exp_id) | ||
else: | ||
st.info("No finished experiments found.") | ||
|
||
|
||
def display_experiment_results(exp_id): | ||
@st.cache_data | ||
def preprocess_experiments(experiments: List[Dict]) -> pd.DataFrame: | ||
formatted_experiments = [ | ||
{ | ||
"id": exp["id"], | ||
"name": exp["name"], | ||
"dataset": exp["dataset"]["name"], | ||
"model": exp["model"]["name"] if exp["model"] else "N/A", | ||
**{f"{result['metric_name']}_score": f"{sum(obs['score'] for obs in result['observation_table'] if obs['score'] is not None) / len([obs for obs in result['observation_table'] if obs['score'] is not None]):.2f}" | ||
for result in exp.get("results", []) if any(obs['score'] is not None for obs in result['observation_table'])} | ||
} | ||
for exp in experiments | ||
if exp["experiment_status"] == FINISHED_STATUS and exp["experiment_set_id"] is None | ||
] | ||
return pd.DataFrame(formatted_experiments) | ||
|
||
def display_experiment_results(exp_id: int): | ||
experiment = fetch_experiment_results(exp_id) | ||
|
||
if not experiment: | ||
st.error(f"No results found for experiment {exp_id}") | ||
return | ||
|
||
if experiment["experiment_status"] != "finished": | ||
if experiment["experiment_status"] != FINISHED_STATUS: | ||
st.warning(f"Experiment {exp_id} is not finished yet...") | ||
|
||
if experiment["num_success"] != experiment["num_try"]: | ||
st.warning("Warning: some experiments are failed.") | ||
st.warning("Warning: some experiments have failed.") | ||
if experiment["num_observation_success"] != experiment["num_observation_try"]: | ||
st.warning("Warning: some metrics are failed.") | ||
st.warning("Warning: some metrics have failed.") | ||
|
||
results_df = process_experiment_results(experiment) | ||
df_with_results, dataset_name, model_name = process_experiment_data(experiment) | ||
|
||
cols = st.columns(3) | ||
cols[0].write(f"**Dataset:** {dataset_name}") | ||
cols[1].write(f"**Model:** {model_name}") | ||
|
||
if not results_df.empty: | ||
st.subheader("Aggregated Results") | ||
st.dataframe(results_df) | ||
|
||
st.subheader("Detailed Results") | ||
st.dataframe(df_with_results) | ||
else: | ||
st.info("No results available for this experiment.") | ||
|
||
|
||
def main(): | ||
st.title("Experiments (not in a Set)") | ||
st.info("Here, you can see the experiments that are not in evaluation sets. ") | ||
|
||
options_button = ["View All Experiments (finished)", "View Experiment by ID"] | ||
view_option = st.radio("Select View Option", options_button) | ||
st.info("Here, you can see the experiments that are not in evaluation sets.") | ||
|
||
if view_option == "View All Experiments (finished)": | ||
display_all_experiments() | ||
st.subheader("All Experiments (finished)") | ||
experiments = fetch_all_experiments() | ||
|
||
if not experiments: | ||
st.error("No experiments found.") | ||
else: | ||
exp_id = st.number_input("Enter Experiment ID", min_value=1, step=1) | ||
if st.button("Show Results"): | ||
display_experiment_results(exp_id) | ||
df = preprocess_experiments(experiments) | ||
|
||
metric_columns = [col for col in df.columns if col.endswith("_score")] | ||
df = df[df[metric_columns].notna().any(axis=1)] | ||
|
||
st.dataframe(df) | ||
|
||
st.divider() | ||
|
||
if not df.empty: | ||
st.markdown("### Select a finished experiment to view details:") | ||
selected_exp_id = st.selectbox( | ||
label="", | ||
options=df["id"].tolist(), | ||
format_func=lambda x: f"Experiment {x}", | ||
label_visibility="collapsed" | ||
) | ||
if st.button("Show Selected Experiment Results"): | ||
display_experiment_results(selected_exp_id) | ||
else: | ||
st.info("No finished experiments found.") | ||
|
||
main() | ||
|
Oops, something went wrong.