Skip to content

Commit

Permalink
Merge pull request #69 from etalab-ia/feat/expset_status_bar
Browse files Browse the repository at this point in the history
feat(expset): add experiments 'finished' and 'failure' ratio in overview
  • Loading branch information
dtrckd authored Dec 20, 2024
2 parents 38c588a + e5f320c commit 669caea
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 25 deletions.
8 changes: 6 additions & 2 deletions api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class ObservationTable(Base):
result = relationship("Result", back_populates="observation_table")

__table_args__ = (
UniqueConstraint('num_line', 'result_id', name='_metric_num_line_unique_constraint'),
UniqueConstraint("num_line", "result_id", name="_metric_num_line_unique_constraint"),
)


Expand All @@ -151,7 +151,7 @@ class Answer(Base):
experiment = relationship("Experiment", back_populates="answers")

__table_args__ = (
UniqueConstraint('num_line', 'experiment_id', name='_answer_num_line_unique_constraint'),
UniqueConstraint("num_line", "experiment_id", name="_answer_num_line_unique_constraint"),
)


Expand All @@ -176,6 +176,10 @@ def num_observation_try(self):
def num_observation_success(self):
return sum(result.num_success for result in self.results)

@property
def num_metrics(self):
return len(self.results)

# One
dataset_id = Column(Integer, ForeignKey("datasets.id"))
dataset = relationship("Dataset")
Expand Down
1 change: 0 additions & 1 deletion api/runners/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ def generate_observation(message: dict):
# Fix SQL schema error with np.float64 !
score = float(score)
elif score is not None:
score = None
raise ValueError("Unsuported score type: %s %s" % (type(score), score))

# Upsert obsevation
Expand Down
3 changes: 3 additions & 0 deletions api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,9 @@ class Experiment(ExperimentBase):
num_observation_success: int = Field(
description="How many metric observations were successfully generated."
)
num_metrics: int = Field(
description="How many metrics are associated to this experiment. See the query parameter `with_results` to get the results per metrics."
)

dataset: Dataset
model: Model | None
Expand Down
69 changes: 47 additions & 22 deletions ui/demo_streamlit/views/experiments_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,33 @@
from io import StringIO


def _get_expset_status(expset: dict) -> tuple[dict, dict]:
status_codes = {
"pending": {"text": "Experiments did not start yet", "color": "yellow"},
"running": {"text": "Experiments are running", "color": "orange"},
"finished": {"text": "All experiments are finished", "color": "green"},
}

counts = dict(
total_answer_tries=sum(exp["num_try"] for exp in expset["experiments"]),
total_answer_successes=sum(exp["num_success"] for exp in expset["experiments"]),
total_observation_tries=sum(exp["num_observation_try"] for exp in expset["experiments"]),
total_observation_successes=sum(exp["num_observation_success"] for exp in expset["experiments"]),
answer_length=sum(exp["dataset"]["size"] for exp in expset["experiments"]),
observation_length=sum(exp["dataset"]["size"]*exp["num_metrics"] for exp in expset["experiments"]),
) # fmt: skip

# Running status
if all(exp["experiment_status"] == "pending" for exp in expset["experiments"]):
status = status_codes["pending"]
elif all(exp["experiment_status"] == "finished" for exp in expset["experiments"]):
status = status_codes["finished"]
else:
status = status_codes["running"]

return status, counts


def get_experiment_data(exp_id):
response = fetch("get", f"/experiment/{exp_id}", {"with_dataset": "true"})
if not response:
Expand All @@ -29,8 +56,20 @@ def get_experiment_data(exp_id):


def display_experiment_set_overview(expset, experiments_df):
status, counts = _get_expset_status(expset)
st.write(f"## Overview of experiment set: {expset['name']}")
st.write(f"experiment_set id: {expset['id']}")
finished_ratio = counts["total_observation_successes"] // counts["observation_length"] * 100
st.markdown(f"Finished: {finished_ratio}%", unsafe_allow_html=True)
failure_ratio = (
(counts["total_observation_tries"] - counts["total_observation_successes"])
// counts["observation_length"]
* 100
)
if failure_ratio > 0:
st.markdown(
f"Failure: <span style='color:red;'>{failure_ratio}%</span>", unsafe_allow_html=True
)

row_height = 35
header_height = 35
Expand Down Expand Up @@ -58,31 +97,13 @@ def display_experiment_set_result(expset, experiments_df):

def display_experiment_sets(experiment_sets):
cols = st.columns(3)
status_codes = {
"pending": {"text": "Experiments did not start yet", "color": "yellow"},
"running": {"text": "Experiments are running", "color": "orange"},
"finished": {"text": "All experiments are finished", "color": "green"},
}

for idx, exp_set in enumerate(experiment_sets):
total_answer_tries = sum(exp["num_try"] for exp in exp_set["experiments"])
total_answer_successes = sum(exp["num_success"] for exp in exp_set["experiments"])
total_observation_tries = sum(exp["num_observation_try"] for exp in exp_set["experiments"])
total_observation_successes = sum(
exp["num_observation_success"] for exp in exp_set["experiments"]
)

# Running status
if all(exp["experiment_status"] == "pending" for exp in exp_set["experiments"]):
status = status_codes["pending"]
elif all(exp["experiment_status"] == "finished" for exp in exp_set["experiments"]):
status = status_codes["finished"]
else:
status = status_codes["running"]
status, counts = _get_expset_status(exp_set)

# Failure status
has_failure = False
if total_observation_tries > total_observation_successes:
if counts["total_observation_tries"] > counts["total_observation_successes"]:
has_failure = True

status_description = status["text"]
Expand Down Expand Up @@ -120,11 +141,15 @@ def display_experiment_sets(experiment_sets):
with st.expander("Failure Analysis", expanded=False):
for exp in exp_set["experiments"]:
if exp["num_try"] != exp["num_success"]:
st.write(f"id: {exp['id']} name: {exp['name']} (failed on output generation)")
st.write(
f"id: {exp['id']} name: {exp['name']} (failed on output generation)"
)
continue

if exp["num_observation_try"] != exp["num_observation_success"]:
st.write(f"id: {exp['id']} name: {exp['name']} (failed on score computation)")
st.write(
f"id: {exp['id']} name: {exp['name']} (failed on score computation)"
)
continue


Expand Down

0 comments on commit 669caea

Please sign in to comment.