From 234bcde34b5951233681455faeb92baaaef97573 Mon Sep 17 00:00:00 2001 From: "Justin W. Lin" Date: Sat, 13 Jul 2024 12:52:19 -0700 Subject: [PATCH] [eval] Add IMO problems with exact answers (#1528) --- evals/registry/data/imo_exact_answers/samples.jsonl | 3 +++ evals/registry/evals/imo_exact_answers.yaml | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 evals/registry/data/imo_exact_answers/samples.jsonl create mode 100644 evals/registry/evals/imo_exact_answers.yaml diff --git a/evals/registry/data/imo_exact_answers/samples.jsonl b/evals/registry/data/imo_exact_answers/samples.jsonl new file mode 100644 index 0000000000..ca5a5dac4a --- /dev/null +++ b/evals/registry/data/imo_exact_answers/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc97a4694cf576bf00540fae940906f5c63547fe421210738079072801e70b8f +size 18492 diff --git a/evals/registry/evals/imo_exact_answers.yaml b/evals/registry/evals/imo_exact_answers.yaml new file mode 100644 index 0000000000..4d72c0e5cf --- /dev/null +++ b/evals/registry/evals/imo_exact_answers.yaml @@ -0,0 +1,9 @@ +imo_exact_answers: + id: imo_exact_answers.dev.v0 + description: A small set of IMO problems that have exact answers (e.g. yes/no, number, fraction). + metrics: [accuracy] + +imo_exact_answers.dev.v0: + class: evals.elsuite.basic.includes:Includes + args: + samples_jsonl: imo_exact_answers/samples.jsonl