Skip to content

Commit

Permalink
Fully functionaly UI for finding the eval-config which works best for…
Browse files Browse the repository at this point in the history
… your score. Includes the ability to run the eval-config-eval.
  • Loading branch information
scosman committed Feb 26, 2025
1 parent ee1318e commit 1133e1a
Show file tree
Hide file tree
Showing 5 changed files with 385 additions and 381 deletions.
13 changes: 13 additions & 0 deletions app/web_ui/src/lib/utils/formatters.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { type EvalConfigType } from "$lib/types"

export function formatDate(dateString: string | undefined): string {
if (!dateString) {
return "Unknown"
Expand Down Expand Up @@ -40,3 +42,14 @@ export function formatDate(dateString: string | undefined): string {
.replace(" PM", "pm")
.replace(",", "")
}

export function eval_config_to_ui_name(
eval_config_type: EvalConfigType,
): string {
return (
{
g_eval: "G-Eval",
llm_as_judge: "LLM as Judge",
}[eval_config_type] || eval_config_type
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import FormElement from "$lib/utils/form_element.svelte"
import type {
EvalConfig,
EvalConfigType,
ProviderModels,
TaskRunConfig,
EvalResultSummary,
Expand All @@ -29,6 +28,8 @@
import Warning from "$lib/ui/warning.svelte"
import { string_to_json_key } from "$lib/utils/json_schema_editor/json_schema_templates"
import InfoTooltip from "$lib/ui/info_tooltip.svelte"
import RunEval from "./run_eval.svelte"
import { eval_config_to_ui_name } from "$lib/utils/formatters"
$: project_id = $page.params.project_id
$: task_id = $page.params.task_id
Expand Down Expand Up @@ -218,15 +219,6 @@
value: string
}
function eval_config_to_ui_name(eval_config_type: EvalConfigType): string {
return (
{
g_eval: "G-Eval",
llm_as_judge: "LLM as Judge",
}[eval_config_type] || eval_config_type
)
}
// A name for the eval config that is human readable and helpful
// Combine's it's memorable name with it's properties
function get_eval_config_name(
Expand Down Expand Up @@ -349,72 +341,12 @@
return results
}
let run_dialog: Dialog | null = null
let running_progress_dialog: Dialog | null = null
let eval_run_error: KilnError | null = null
let eval_state:
| "not_started"
| "running"
| "complete"
| "complete_with_errors" = "not_started"
let eval_complete_count = 0
let eval_total_count = 0
let eval_error_count = 0
function run_eval(): boolean {
if (!current_eval_config_id) {
eval_run_error = new KilnError("No eval config selected", null)
eval_state = "complete_with_errors"
// True to close the run dialog, and then show the error in the progress dialog
running_progress_dialog?.show()
return true
}
score_summary = null
eval_state = "running"
eval_complete_count = 0
eval_total_count = 0
eval_error_count = 0
const eventSource = new EventSource(
`${base_url}/api/projects/${project_id}/tasks/${task_id}/eval/${eval_id}/eval_config/${current_eval_config_id}/run?all_run_configs=true`,
)
eventSource.onmessage = (event) => {
try {
if (event.data === "complete") {
// Special end message
eventSource.close()
eval_state =
eval_error_count > 0 ? "complete_with_errors" : "complete"
get_score_summary()
} else {
const data = JSON.parse(event.data)
eval_complete_count = data.progress
eval_total_count = data.total
eval_error_count = data.errors
eval_state = "running"
}
} catch (error) {
eval_run_error = createKilnError(error)
eval_state = "complete_with_errors"
get_score_summary()
}
}
// Don't restart on an error (default SSE behavior)
eventSource.onerror = (error) => {
eventSource.close()
eval_state = "complete_with_errors"
eval_run_error = createKilnError(error)
get_score_summary()
}
// Switch over to the progress dialog, closing the run dialog
running_progress_dialog?.show()
return true
}
$: run_eval_url = `${base_url}/api/projects/${project_id}/tasks/${task_id}/eval/${eval_id}/eval_config/${current_eval_config_id}/run?all_run_configs=true`
let task_run_config_model_name = ""
let task_run_config_provider_name = ""
Expand Down Expand Up @@ -561,8 +493,7 @@
<div class="grow">
<div class="text-xl font-bold">Results Summary</div>
<div class="text-xs text-gray-500">
Overview of how various task run configs perform on the selected
evaluator{current_eval_config
How various task run configs perform on the selected evaluator{current_eval_config
? ` (${current_eval_config.name})`
: ""}.
</div>
Expand All @@ -581,31 +512,15 @@
add_task_config_dialog?.show()
}}>Add Run Config</button
>
<button
class="btn btn-mid btn-primary"
on:click={() => {
run_dialog?.show()
}}>Run Eval</button
>
{:else}
<button
class="btn btn-mid"
on:click={() => {
running_progress_dialog?.show()
}}
>
{#if eval_state === "running"}
<div class="loading loading-spinner loading-xs"></div>
Running...
{:else if eval_state === "complete"}
Eval Complete
{:else if eval_state === "complete_with_errors"}
Eval Complete with Errors
{:else}
Eval Status
{/if}
</button>
{/if}
<RunEval
bind:eval_state
bind:run_url={run_eval_url}
on_run_complete={() => {
console.log("run complete")
get_score_summary()
}}
/>
</div>
</div>

Expand Down Expand Up @@ -787,79 +702,3 @@
{/if}
</div>
</Dialog>

<Dialog
bind:this={running_progress_dialog}
title="Eval Progress"
action_buttons={eval_state === "complete" ||
eval_state === "complete_with_errors"
? [
{
label: "Close",
isCancel: true,
isPrimary: false,
},
]
: []}
>
<div
class="mt-12 mb-6 flex flex-col items-center justify-center min-h-[100px] text-center"
>
{#if eval_state === "complete"}
<div class="font-medium">Eval Complete 🎉</div>
{#if eval_total_count == 0}
<div class="text-gray-500 text-sm mt-2">
No evals were run, because everything was already up to date!
</div>
{/if}
{:else if eval_state === "complete_with_errors"}
<div class="font-medium">Eval Complete with Errors</div>
{:else if eval_state === "running"}
<div class="loading loading-spinner loading-lg text-success"></div>
<div class="font-medium mt-4">Running...</div>
{/if}
<div class="text-sm font-light min-w-[120px]">
{#if eval_total_count > 0}
<div>
{eval_complete_count + eval_error_count} of {eval_total_count}
</div>
{/if}
{#if eval_error_count > 0}
<div class="text-error font-light text-xs">
{eval_error_count} error{eval_error_count === 1 ? "" : "s"}
</div>
{/if}
{#if eval_run_error}
<div class="text-error font-light text-xs mt-2">
{eval_run_error.getMessage() || "An unknown error occurred"}
</div>
{/if}
</div>
</div>
</Dialog>

<Dialog
bind:this={run_dialog}
title="Run Eval"
action_buttons={[
{
label: "Cancel",
isCancel: true,
},
{
label: "Run Eval",
action: run_eval,
isPrimary: true,
},
]}
>
<div class="flex flex-col gap-2 font-light mt-4">
<div>Run this eval with the selected configuration?</div>
<div>Don't close this page if you want to monitor progress.</div>
<Warning
warning_color="warning"
warning_message="This may use considerable compute/credits."
tight={true}
/>
</div>
</Dialog>
Loading

0 comments on commit 1133e1a

Please sign in to comment.