Fully functionaly UI for finding the eval-config which works best for…

… your score. Includes the ability to run the eval-config-eval.
Kiln-AI · Feb 26, 2025 · 1133e1a · 1133e1a
1 parent ee1318e
commit 1133e1a
Show file tree

Hide file tree

Showing 5 changed files with 385 additions and 381 deletions.
diff --git a/app/web_ui/src/lib/utils/formatters.ts b/app/web_ui/src/lib/utils/formatters.ts
@@ -1,3 +1,5 @@
+import { type EvalConfigType } from "$lib/types"
+
 export function formatDate(dateString: string | undefined): string {
   if (!dateString) {
     return "Unknown"
@@ -40,3 +42,14 @@ export function formatDate(dateString: string | undefined): string {
     .replace(" PM", "pm")
     .replace(",", "")
 }
+
+export function eval_config_to_ui_name(
+  eval_config_type: EvalConfigType,
+): string {
+  return (
+    {
+      g_eval: "G-Eval",
+      llm_as_judge: "LLM as Judge",
+    }[eval_config_type] || eval_config_type
+  )
+}
diff --git a/app/web_ui/src/routes/(app)/evals/[project_id]/[task_id]/[eval_id]/+page.svelte b/app/web_ui/src/routes/(app)/evals/[project_id]/[task_id]/[eval_id]/+page.svelte
@@ -8,7 +8,6 @@
   import FormElement from "$lib/utils/form_element.svelte"
   import type {
     EvalConfig,
-    EvalConfigType,
     ProviderModels,
     TaskRunConfig,
     EvalResultSummary,
@@ -29,6 +28,8 @@
   import Warning from "$lib/ui/warning.svelte"
   import { string_to_json_key } from "$lib/utils/json_schema_editor/json_schema_templates"
   import InfoTooltip from "$lib/ui/info_tooltip.svelte"
+  import RunEval from "./run_eval.svelte"
+  import { eval_config_to_ui_name } from "$lib/utils/formatters"
 
   $: project_id = $page.params.project_id
   $: task_id = $page.params.task_id
@@ -218,15 +219,6 @@
     value: string
   }
 
-  function eval_config_to_ui_name(eval_config_type: EvalConfigType): string {
-    return (
-      {
-        g_eval: "G-Eval",
-        llm_as_judge: "LLM as Judge",
-      }[eval_config_type] || eval_config_type
-    )
-  }
-
   // A name for the eval config that is human readable and helpful
   // Combine's it's memorable name with it's properties
   function get_eval_config_name(
@@ -349,72 +341,12 @@
     return results
   }
 
-  let run_dialog: Dialog | null = null
-  let running_progress_dialog: Dialog | null = null
-
-  let eval_run_error: KilnError | null = null
   let eval_state:
     | "not_started"
     | "running"
     | "complete"
     | "complete_with_errors" = "not_started"
-  let eval_complete_count = 0
-  let eval_total_count = 0
-  let eval_error_count = 0
-
-  function run_eval(): boolean {
-    if (!current_eval_config_id) {
-      eval_run_error = new KilnError("No eval config selected", null)
-      eval_state = "complete_with_errors"
-      // True to close the run dialog, and then show the error in the progress dialog
-      running_progress_dialog?.show()
-      return true
-    }
-
-    score_summary = null
-    eval_state = "running"
-    eval_complete_count = 0
-    eval_total_count = 0
-    eval_error_count = 0
-
-    const eventSource = new EventSource(
-      `${base_url}/api/projects/${project_id}/tasks/${task_id}/eval/${eval_id}/eval_config/${current_eval_config_id}/run?all_run_configs=true`,
-    )
-
-    eventSource.onmessage = (event) => {
-      try {
-        if (event.data === "complete") {
-          // Special end message
-          eventSource.close()
-          eval_state =
-            eval_error_count > 0 ? "complete_with_errors" : "complete"
-          get_score_summary()
-        } else {
-          const data = JSON.parse(event.data)
-          eval_complete_count = data.progress
-          eval_total_count = data.total
-          eval_error_count = data.errors
-          eval_state = "running"
-        }
-      } catch (error) {
-        eval_run_error = createKilnError(error)
-        eval_state = "complete_with_errors"
-        get_score_summary()
-      }
-    }
-
-    // Don't restart on an error (default SSE behavior)
-    eventSource.onerror = (error) => {
-      eventSource.close()
-      eval_state = "complete_with_errors"
-      eval_run_error = createKilnError(error)
-      get_score_summary()
-    }
-
-    // Switch over to the progress dialog, closing the run dialog
-    running_progress_dialog?.show()
-    return true
-  }
+  $: run_eval_url = `${base_url}/api/projects/${project_id}/tasks/${task_id}/eval/${eval_id}/eval_config/${current_eval_config_id}/run?all_run_configs=true`
 
   let task_run_config_model_name = ""
   let task_run_config_provider_name = ""
@@ -561,8 +493,7 @@
           <div class="grow">
             <div class="text-xl font-bold">Results Summary</div>
             <div class="text-xs text-gray-500">
-              Overview of how various task run configs perform on the selected
-              evaluator{current_eval_config
+              How various task run configs perform on the selected evaluator{current_eval_config
                 ? ` (${current_eval_config.name})`
                 : ""}.
             </div>
@@ -581,31 +512,15 @@
                   add_task_config_dialog?.show()
                 }}>Add Run Config</button
               >
-              <button
-                class="btn btn-mid btn-primary"
-                on:click={() => {
-                  run_dialog?.show()
-                }}>Run Eval</button
-              >
-            {:else}
-              <button
-                class="btn btn-mid"
-                on:click={() => {
-                  running_progress_dialog?.show()
-                }}
-              >
-                {#if eval_state === "running"}
-                  <div class="loading loading-spinner loading-xs"></div>
-                  Running...
-                {:else if eval_state === "complete"}
-                  Eval Complete
-                {:else if eval_state === "complete_with_errors"}
-                  Eval Complete with Errors
-                {:else}
-                  Eval Status
-                {/if}
-              </button>
             {/if}
+            <RunEval
+              bind:eval_state
+              bind:run_url={run_eval_url}
+              on_run_complete={() => {
+                console.log("run complete")
+                get_score_summary()
+              }}
+            />
           </div>
         </div>
 
@@ -787,79 +702,3 @@
     {/if}
   </div>
 </Dialog>
-
-<Dialog
-  bind:this={running_progress_dialog}
-  title="Eval Progress"
-  action_buttons={eval_state === "complete" ||
-  eval_state === "complete_with_errors"
-    ? [
-        {
-          label: "Close",
-          isCancel: true,
-          isPrimary: false,
-        },
-      ]
-    : []}
->
-  <div
-    class="mt-12 mb-6 flex flex-col items-center justify-center min-h-[100px] text-center"
-  >
-    {#if eval_state === "complete"}
-      <div class="font-medium">Eval Complete 🎉</div>
-      {#if eval_total_count == 0}
-        <div class="text-gray-500 text-sm mt-2">
-          No evals were run, because everything was already up to date!
-        </div>
-      {/if}
-    {:else if eval_state === "complete_with_errors"}
-      <div class="font-medium">Eval Complete with Errors</div>
-    {:else if eval_state === "running"}
-      <div class="loading loading-spinner loading-lg text-success"></div>
-      <div class="font-medium mt-4">Running...</div>
-    {/if}
-    <div class="text-sm font-light min-w-[120px]">
-      {#if eval_total_count > 0}
-        <div>
-          {eval_complete_count + eval_error_count} of {eval_total_count}
-        </div>
-      {/if}
-      {#if eval_error_count > 0}
-        <div class="text-error font-light text-xs">
-          {eval_error_count} error{eval_error_count === 1 ? "" : "s"}
-        </div>
-      {/if}
-      {#if eval_run_error}
-        <div class="text-error font-light text-xs mt-2">
-          {eval_run_error.getMessage() || "An unknown error occurred"}
-        </div>
-      {/if}
-    </div>
-  </div>
-</Dialog>
-
-<Dialog
-  bind:this={run_dialog}
-  title="Run Eval"
-  action_buttons={[
-    {
-      label: "Cancel",
-      isCancel: true,
-    },
-    {
-      label: "Run Eval",
-      action: run_eval,
-      isPrimary: true,
-    },
-  ]}
->
-  <div class="flex flex-col gap-2 font-light mt-4">
-    <div>Run this eval with the selected configuration?</div>
-    <div>Don't close this page if you want to monitor progress.</div>
-    <Warning
-      warning_color="warning"
-      warning_message="This may use considerable compute/credits."
-      tight={true}
-    />
-  </div>
-</Dialog>