diff --git a/src/aviary/core.py b/src/aviary/core.py
index 587e76b5..6170a7bd 100644
--- a/src/aviary/core.py
+++ b/src/aviary/core.py
@@ -40,6 +40,7 @@
     EvalAnswerMode,
     encode_image_to_base64,
     eval_answer,
+    extract_answer,
     is_coroutine_callable,
     partial_format,
 )
@@ -82,6 +83,7 @@
     "encode_image_to_base64",
     "eval_answer",
     "eval_answer",
+    "extract_answer",
     "fenv",
     "is_coroutine_callable",
     "join",
diff --git a/src/aviary/utils.py b/src/aviary/utils.py
index 35687bd2..b495f18b 100644
--- a/src/aviary/utils.py
+++ b/src/aviary/utils.py
@@ -3,10 +3,9 @@
 import inspect
 import io
 import random
-import re
 import string
 from ast import literal_eval
-from collections.abc import Awaitable, Callable, Sequence
+from collections.abc import Sequence
 from enum import StrEnum
 from typing import TYPE_CHECKING, Any, ClassVar, Literal, Self, cast
 
@@ -21,8 +20,8 @@
     import numpy as np
 
 
-DEFAULT_EVAL_MODEL_NAME = "gpt-4o"
-LLM_BOOL_EVAL_CONFIG = {
+DEFAULT_EVAL_MODEL_NAME = "gpt-4o-mini"
+LLM_BOOL_EVAL_CONFIG: dict[str, Any] = {
     "prompt": (
         "Here is a question, the correct answer to the question, and a proposed answer"
         " to the question. Please tell me if the proposed answer is correct, given the"
@@ -35,6 +34,18 @@
     "temperature": 0,
 }
 
+LLM_EXTRACT_CONFIG = LLM_BOOL_EVAL_CONFIG | {
+    "prompt": (
+        "You are evaluating answers for a test which has fixed options. "
+        "Repeat back which option the proposed answer matches. "
+        "GIVE ONLY THE VERBATIM TEXT OF A FIXED OPTION. "
+        "If the proposed answer is empty, invalid, or ambiguous, "
+        "return an empty string."
+        "\n\nOptions:\n{options}"
+        "\n\nProposed answer: {proposed_answer}"
+    )
+}
+
 LLM_SCORE_EVAL_CONFIG = LLM_BOOL_EVAL_CONFIG | {
     "prompt": (
         "Here is a question, the correct answer to the question, and a rubric for"
@@ -175,21 +186,36 @@ async def eval_answer(
     raise RuntimeError(f"Invalid evaluation mode: {eval_mode}")
 
 
+async def extract_answer(
+    proposed_answer: str, options: Sequence[str], llm_eval_config: dict | None = None
+) -> str | None:
+    """Extract the answer matching a proposal from a list of options using an LLM."""
+    for option in options:
+        if proposed_answer.strip().casefold() == option.strip().casefold():
+            return option
+
+    default_config = LLM_EXTRACT_CONFIG
+    config = llm_eval_config or default_config
+    response_msg = await run_prompt(
+        prompt=config.get("prompt", default_config["prompt"]).format(
+            options="\n".join(options),
+            proposed_answer=proposed_answer,
+        ),
+        model=config.get("model", default_config["model"]),
+        temperature=config.get("temperature", default_config["temperature"]),
+    )
+    answer = response_msg.strip().casefold()  # noqa: FURB184
+    for option in options:
+        if answer == option.strip().casefold():
+            return option
+    return None
+
+
 _CAPITAL_A_INDEX = ord("A")
 
 
 class MultipleChoiceQuestion(BaseModel):
     QUESTION_PROMPT_TEMPLATE: ClassVar[str] = "Q: {question}\n\nOptions:\n{options}"
-    # TODO: combine with above eval_answer and its prompts
-    EVALUATION_PROMPT_TEMPLATE: ClassVar[str] = (
-        "Given the following question and a proposed answer to the question, return the"
-        " single-letter choice in the question that matches the proposed answer."
-        " If the proposed answer is blank or an empty string,"
-        " or multiple options are matched, respond with '0'."
-        "\n\nQuestion: {qa_prompt}"
-        "\n\nProposed Answer: {qa_answer}"
-        "\n\nSingle Letter Answer:"
-    )
     DEFAULT_UNSURE_OPTION: ClassVar[str] = (
         "Insufficient information to answer this question"
     )
@@ -280,18 +306,14 @@ def split_options(options: str) -> list[str]:
         return split_options
 
     async def grade(
-        self, answer: str, prompt_runner: Callable[[str], Awaitable[str]] | None = None
-    ) -> "tuple[MultipleChoiceEvaluation, str, str]":
-        if prompt_runner is None:
-            prompt_runner = run_prompt
-        eval_prompt = self.EVALUATION_PROMPT_TEMPLATE.format(
-            qa_prompt=self.question_prompt, qa_answer=answer
-        )
-        raw_evaluation = await prompt_runner(eval_prompt)
-        evaluation, parsed_answer = MultipleChoiceEvaluation.from_answer(
-            raw_evaluation, self
+        self, proposed_answer: str
+    ) -> "tuple[MultipleChoiceEvaluation, str | None]":
+        extracted_answer = await extract_answer(
+            proposed_answer=proposed_answer, options=self.options
         )
-        return evaluation, raw_evaluation, parsed_answer
+        return MultipleChoiceEvaluation.from_answer(
+            extracted_answer, self
+        ), extracted_answer
 
 
 class MultipleChoiceEvaluation(StrEnum):
@@ -323,32 +345,19 @@ def calculate_accuracy_precision(
 
     @classmethod
     def from_answer(
-        cls, answer: str, question: MultipleChoiceQuestion
-    ) -> "tuple[MultipleChoiceEvaluation, str]":
+        cls, extracted_answer: str | None, question: MultipleChoiceQuestion
+    ) -> "MultipleChoiceEvaluation":
         """Make an evaluation from the input answer and multiple choice question.
 
         Returns:
-            Two-tuple of answer enum and the raw answer extracted from the input answer.
+            Evaluation corresponding to the parsed answer.
         """
-        # SEE: https://regex101.com/r/vcE9Hb/1
-        letter_search = re.search(r"([A-Z])\)?", answer, re.DOTALL)
-        # Get the letter answer, or fail over to the first non-whitespace char
-        answer_char = (
-            letter_search.group(1)
-            if letter_search is not None
-            else answer.split()[0][0].upper()
-        )
-        answer_letter_index = ord(answer_char[0]) - _CAPITAL_A_INDEX
-        if answer_letter_index < 0 or answer_letter_index > len(question.options):
-            # The result extracted was not in the options (e.g. '0')
-            return cls.INCORRECT, answer_char
+        if extracted_answer is None:
+            return MultipleChoiceEvaluation.INCORRECT
         # From here, if we don't match either the ideal or the unsure multiple choice
         # options then we declare the answer as incorrect.
-        if (
-            question.unsure_answer_index is not None
-            and answer_letter_index == question.unsure_answer_index
-        ):
-            return cls.UNSURE, cast(str, question.unsure_answer)
-        if answer_letter_index == question.ideal_answer_index:
-            return cls.CORRECT, question.ideal_answer
-        return cls.INCORRECT, question.options[answer_letter_index]
+        if extracted_answer == question.ideal_answer:
+            return MultipleChoiceEvaluation.CORRECT
+        if question.unsure_answer and extracted_answer == question.unsure_answer:
+            return MultipleChoiceEvaluation.UNSURE
+        return MultipleChoiceEvaluation.INCORRECT
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-llm-has-innate-knowledge].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-llm-has-innate-knowledge].yaml
index be2df091..617568d2 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-llm-has-innate-knowledge].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-llm-has-innate-knowledge].yaml
@@ -1,13 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        the meaning of life?\n\nOptions:\nA) -84\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 11\nE) 42\n\nProposed Answer: 14\n\nSingle
-        Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-84\nInsufficient information
+        to answer this question\ncheesecake\n11\n42\n\nProposed answer: 14", "role":
+        "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "513"
+          - "442"
         content-type:
           - application/json
         host:
@@ -36,7 +35,7 @@ interactions:
         x-stainless-raw-response:
           - "true"
         x-stainless-retry-count:
-          - "1"
+          - "0"
         x-stainless-runtime:
           - CPython
         x-stainless-runtime-version:
@@ -46,18 +45,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJJLa8MwEITv/hVC57goqamd3HpooPQBORVSilGktaNW1gpJoY+Q/15k
-          u7FDU+jFh/l2xrNr7xNCqJJ0QajY8iAaq9Prav24NO7rNmMKH55ulvd8VaxXHu/ejaOT6MDNK4jw
-          47oQ2FgNQaHpsHDAA8TUaX6ZZTnLi3kLGpSgo622Ic0wnbFZlrIiZVe9cYtKgKcL8pwQQsi+fcaK
-          RsIHXRA2+VEa8J7XQBfHIUKoQx0Vyr1XPnAT6GSAAk0A07ZmY91BtfM81jI7rXv9cHyRxto63Pie
-          H/VKGeW3pQPu0cRQH9DSlh4SQl7ahXYnHal12NhQBnwDEwOnrOjy6HDCEe1ZwMD12DSfnIkrJQSu
-          tB9dhAoutiAH63A+vpMKRyAZLf27zLnsbnFl6v/ED0AIsAFkaR1IJU4XHsYcxB/sr7HjkdvC1H/6
-          AE1ZKVODs05137iyJc/nspBcTCuaHJJvAAAA//8DAGY5XevsAgAA
+          H4sIAAAAAAAAAwAAAP//jFLLTsMwELznK6w9Nyh9P24V0AMHBBK9gFDk2pvU4NiWveVV9d+R00da
+          tUhcfJjZGc+svU4YAyVhwkAsOYnK6XRafF7fzuWPeLwZP4v7p5maTwd3GD6yB5pBKyrs4g0F7VVX
+          wlZOIylrtrTwyAmja3vY7fX7o357VBOVlaijrHSU9mxaKaPSTtbppdkwbY926qVVAgNM2EvCGGPr
+          +ow5jcQvmLCstUcqDIGXCJPDEGPgrY4I8BBUIG4IWg0prCE0dfRj2GOxCjxGMyutd/jmcI+2pfN2
+          EXb8AS+UUWGZe+TBmugZyDqo2U3C2GvdZ3USEZy3laOc7DuaaDjqb+2g2WJD7qoCWeL6gubELJdI
+          XOlwtA4QXCxRnhkyBnwllT0ikqPK51kueW9rK1P+x74hhEBHKHPnUSpxsW9tHr/YX2OHFdeBIXwH
+          wiovlCnRO6+2D1y4vDvmvUyMBzyDZJP8AgAA//8DADaBBszuAgAA
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fde1cf88cf1b-SJC
+          - 8f425bb2ac70f953-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,9 +64,15 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:29 GMT
+          - Wed, 18 Dec 2024 21:48:38 GMT
         Server:
           - cloudflare
+        Set-Cookie:
+          - __cf_bm=Z3Wkkk2LQA2GKAPZVirKPYLTJfmm9Luttv26RxPBKro-1734558518-1.0.1.1-4BZR47qupd.QCWRMrfyj_F2lS0fqBEuzxwPZTqYPUxSKwdzL4S_8YWk9ofOPXhFEnkMN6nwgWjBLjAR4nioxiQ;
+            path=/; expires=Wed, 18-Dec-24 22:18:38 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=B7CeJKL1WXveU2pmeUGy_AFjPsbf25SvdiSN_4fxTXE-1734558518441-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
         Transfer-Encoding:
           - chunked
         X-Content-Type-Options:
@@ -79,25 +84,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "363"
+          - "144"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999874"
+          - "149999896"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_aff8daa48aa43d3df077f97da6136e5a
+          - req_503cd8163bd0d3b634eb723d6874b1da
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-no-llm-innate-knowledge].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-no-llm-innate-knowledge].yaml
index 38077163..77357e4c 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-no-llm-innate-knowledge].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[didnt-match-and-no-llm-innate-knowledge].yaml
@@ -1,13 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        my office''s zip code?\n\nOptions:\nA) -8\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 94106\nE) 94107\n\nProposed Answer: the answer
-        is 14004\n\nSingle Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-8\nInsufficient information
+        to answer this question\ncheesecake\n94106\n94107\n\nProposed answer: the answer
+        is 14004", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "536"
+          - "464"
         content-type:
           - application/json
         host:
@@ -36,7 +35,7 @@ interactions:
         x-stainless-raw-response:
           - "true"
         x-stainless-retry-count:
-          - "1"
+          - "0"
         x-stainless-runtime:
           - CPython
         x-stainless-runtime-version:
@@ -46,18 +45,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJJLT8MwEITv+RWWzw1KS2lCb3BCSDykcuAhFBl7kxocr2Vveajqf0dO
-          2yQVIHHJYb6dyewm64QxrhWfMy6XgmTjTHpWPV6fFzf1Jcwebu4WF4tbunKFwvzj/n3GR9GBL68g
-          ae86ktg4A6TRbrH0IAhi6jg/nk7zLC9OWtCgAhNttaN0iukkm0zTrEizXa5copYQ+Jw9JYwxtm6f
-          saJV8MnnLBvtlQZCEDXweTfEGPdoosJFCDqQsMRHPZRoCWzbOhvqHqpVELGWXRmz0zfdiwzWzuNL
-          2PFOr7TVYVl6EAFtDA2Ejrd0kzD23C60OujIncfGUUn4BjYGjscn2zzen3BAd4yQhBmaZqNf4koF
-          JLQJg4twKeQSVG/tzydWSuMAJIOlf5b5LXu7uLb1f+J7ICU4AlU6D0rLw4X7MQ/xB/trrDtyW5iH
-          r0DQlJW2NXjn9fYbV64U+akqlJDjiieb5BsAAP//AwBRMcSQ7AIAAA==
+          H4sIAAAAAAAAAwAAAP//jFLLbsIwELznK6w9kyqB8MoNoR7aAxdKVamqImNvElPHtmyjPhD/XjlQ
+          AoJKvfgwszOeWXsXEQKCQ06A1dSzxsh4Vn7M71fZMntcjAbfz6uXjTSbh9mCPk2Xc+gFhV5vkPlf
+          1R3TjZHohVYHmlmkHoNrOh5kw+FkmE5aotEcZZBVxseZjhuhRNxP+lmcjON0clTXWjB0kJPXiBBC
+          du0ZciqOn5CTpPeLNOgcrRDy0xAhYLUMCFDnhPNUeeh1JNPKo2qjn8MWy62jIZraSnnE96d7pK6M
+          1Wt35E94KZRwdWGROq2Cp/PaQMvuI0Le2j7bi4hgrG6ML7x+RxUMp+nBDrotduSxKnjtqbyhuTAr
+          OHoqpDtbBzDKauRXhoQA3XKhz4jorPJ1llveh9pCVf+x7wjG0HjkhbHIBbvZtzUPX+yvsdOK28Dg
+          vpzHpiiFqtAaKw4PXJpiVLI0wTTBNUT76AcAAP//AwBkI2np7gIAAA==
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fdc63fbf9e53-SJC
+          - 8f425bb11b702519-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,9 +64,15 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:25 GMT
+          - Wed, 18 Dec 2024 21:48:38 GMT
         Server:
           - cloudflare
+        Set-Cookie:
+          - __cf_bm=6j4w6Jnsg0wGsZf61WcNCvHdr1Vcb6uVLFFhTQQgcv4-1734558518-1.0.1.1-D0vsT8nCM66xiA.Xa6ijXpgeGPM65Iux2KhQqUiD8wToq.VmwT03dnkmELw1qn0GvHJvh8g7H6WkqYzXVgs2Xg;
+            path=/; expires=Wed, 18-Dec-24 22:18:38 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=LFVOxysXKxTPNQ2KK05aqbBnIRDPc45hskCPkFcOjXA-1734558518178-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
         Transfer-Encoding:
           - chunked
         X-Content-Type-Options:
@@ -79,25 +84,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "212"
+          - "131"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999868"
+          - "149999890"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_afd8c66d84f3b42a8cd2b8a6bf855054
+          - req_12c5e1cdb8b2ba32b075f04f20194421
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer1].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer1].yaml
index 057ef1d0..6865d713 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer1].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer1].yaml
@@ -1,13 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        my office''s zip code?\n\nOptions:\nA) -8\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 94106\nE) 94107\n\nProposed Answer: \n\nSingle
-        Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-8\nInsufficient information
+        to answer this question\ncheesecake\n94106\n94107\n\nProposed answer: ", "role":
+        "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "517"
+          - "445"
         content-type:
           - application/json
         host:
@@ -46,18 +45,16 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jFLLTsMwELznKyyfG+S2gaa5FegFJKRKnEAocp1NaurYlr3hoar/jpyG
-          JBUgcfFhZmc8O/YhIoTKgmaEih1HUVsVr8qnh/XbzfvdI7LN7WzF9vf8Mik31xtYN3QSFGb7CgK/
-          VRfC1FYBSqNPtHDAEYLrdDFPkgVbpGlL1KYAFWSVxTgx8YzNkpilMbvqhDsjBXiakeeIEEIO7Rki
-          6gI+aEbY5BupwXteAc36IUKoMyoglHsvPXKNdDKQwmgE3aZmY9xB2XgeYulGqQ4/9hcpU1lntr7j
-          e7yUWvpd7oB7o4OpR2Npyx4jQl7ahZqzjNQ6U1vM0exBB8MpW5786FDhiO04NMjVCJ52LZzb5QUg
-          l8qPGqGCix0Ug3SojzeFNCMiGi39M8xv3qfFpa7+Yz8QQoBFKHLroJDifOFhzEH4YH+N9SW3gan/
-          9Ah1XkpdgbNOnt64tPmy5Fu+LNk8pdEx+gIAAP//AwDTwVpp7AIAAA==
+          H4sIAAAAAAAAAwAAAP//jFJdS8MwFH3vrwj3eZV2X46+6RAR0T2JikjJkts2miYhSVEZ+++Srms3
+          NsGXPJxzz8k5N9lEhIDgkBFgFfWsNjK+Kr6WN6vb8XL+9LqSxj3Qu2R1ff/8yCfiBUZBodcfyPxe
+          dcF0bSR6odWOZhapx+CaXk6ms9lili5aotYcZZCVxsdTHddCiXicjKdxchmni05dacHQQUbeIkII
+          2bRnyKk4fkNGktEeqdE5WiJk/RAhYLUMCFDnhPNUeRgNJNPKo2qjH8IWi8bREE01Unb4tr9H6tJY
+          vXYd3+OFUMJVuUXqtAqezmsDLbuNCHlv+zRHEcFYXRufe/2JKhgu5js7GLY4kF1V8NpTeUZzZJZz
+          9FRId7AOYJRVyE8MCQHacKEPiOig8mmWc9672kKV/7EfCMbQeOS5scgFO9u3NQ9f7K+xfsVtYHA/
+          zmOdF0KVaI0VuwcuTD4vWJpgmuAaom30CwAA//8DAL0A1qzuAgAA
       headers:
-        CF-Cache-Status:
-          - DYNAMIC
         CF-RAY:
-          - 8f39fddcea1d251d-SJC
+          - 8f425bb5de5996de-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,7 +62,7 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:28 GMT
+          - Wed, 18 Dec 2024 21:48:39 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -76,28 +73,30 @@ interactions:
           - X-Request-ID
         alt-svc:
           - h3=":443"; ma=86400
+        cf-cache-status:
+          - DYNAMIC
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "174"
+          - "233"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999872"
+          - "149999896"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_4d40eb2c66dfd308a7b75c7cd80c405b
+          - req_0c845e0049332bd1fa73fdbe76005ea1
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer2].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer2].yaml
index a0acce15..4a0fa4ae 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer2].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer2].yaml
@@ -1,13 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        the meaning of life?\n\nOptions:\nA) -84\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 11\nE) 42\n\nProposed Answer: \n\nSingle Letter
-        Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-84\nInsufficient information
+        to answer this question\ncheesecake\n11\n42\n\nProposed answer: ", "role": "user"}],
+        "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "511"
+          - "440"
         content-type:
           - application/json
         host:
@@ -46,18 +45,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJJRT4MwFIXf+RVNn4eBicPx5oszWTJjfNBoDOnaC9SVtmlLMrPsv5sW
-          HCzOxBceznfP4dwLhwghzBkuEKYNcbTVIr6r3jar1eb5IXtKb7brPL9/3e/a5fxFrOtHPPMOtf0E
-          6n5cV1S1WoDjSvaYGiAOfGqaX2dZnuTLJIBWMRDeVmsXZyqeJ/MsTm7jZDEYG8UpWFyg9wghhA7h
-          6StKBntcoBATlBasJTXg4jSEEDZKeAUTa7l1RDo8GyFV0oEMrZOpbqDqLPG1ZCfEoB9PLxKq1kZt
-          7cBPesUlt01pgFglfah1SuNAjxFCH2Gh7qwj1ka12pVO7UD6wDRZ9Hl4POGEDswpR8TUlM8uxJUM
-          HOHCTi6CKaENsNE6no90jKsJiCZL/y5zKbtfnMv6P/EjoBS0A1ZqA4zT84XHMQP+B/tr7HTkUBjb
-          L+ugLSsuazDa8P4bV7ok+ZLdMkLTCkfH6BsAAP//AwBwbnWk7AIAAA==
+          H4sIAAAAAAAAAwAAAP//jFJda8IwFH3vrwj32Q6tLTrfhjB8FhyyMUpMbttomoQk3Qfifx+ptXXo
+          YC95OOeek3NucowIAcFhQYBV1LPayPip+Fw+rz5e6221zta4fKENiu1qszkk2z2MgkLv9sj8RfXA
+          dG0keqHVmWYWqcfgOplN0yybZ5PHlqg1RxlkpfFxquNaKBEn4ySNx7N4Mu/UlRYMHSzIW0QIIcf2
+          DDkVxy9YkPHogtToHC0RFv0QIWC1DAhQ54TzVHkYDSTTyqNqo1/DFovG0RBNNVJ2+Km/R+rSWL1z
+          Hd/jhVDCVblF6rQKns5rAy17igh5b/s0vyKCsbo2Pvf6gCoYztOzHQxbHMiuKnjtqbyj+WWWc/RU
+          SHe1DmCUVchvDAkB2nChr4joqvJtlnve59pClf+xHwjG0HjkubHIBbvbtzUPX+yvsX7FbWBw385j
+          nRdClWiNFecHLkzOxwnPppNdOoPoFP0AAAD//wMAMCnsc+4CAAA=
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fde81f05ceb1-SJC
+          - 8f425bb72f9b67dc-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,7 +64,7 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:30 GMT
+          - Wed, 18 Dec 2024 21:48:39 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -79,25 +78,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "332"
+          - "532"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999875"
+          - "149999896"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_817ca7ae018d7baa48236c7ad4f4f151
+          - req_ed9d0e7998f792094d5aefe723693f28
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer3].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer3].yaml
index d70cc972..f6e5e085 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer3].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[empty-answer3].yaml
@@ -1,14 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What method
-        was used to demonstrate that the enzyme PafA is stable after incubation with
-        4M urea for 14 days?\n\nOptions:\nA) cryo EM\nB) Insufficient information to
-        answer this question\nC) NMR\nD) x-ray crystallography\nE) circular dichroism\n\nProposed
-        Answer: \n\nSingle Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\ncryo EM\nInsufficient information
+        to answer this question\nNMR\nx-ray crystallography\ncircular dichroism\n\nProposed
+        answer: ", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -17,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "624"
+          - "472"
         content-type:
           - application/json
         host:
@@ -47,18 +45,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJLNboMwEITvPIXlM1SEkJJyS9VIlfpz6SmpKuSYhTg1tmVvpEZR3r0y
-          IUDVVOqFw3w7w+zCMSCEipLmhPItQ94YGS2q9eujmL9NXtaJWKW7wxM+r+TD8rC4ny5p6B16swOO
-          F9cN142RgEKrM+YWGIJPnWTTNM3i7G7SgkaXIL2tNhilOkriJI3ieRTfdsatFhwczcl7QAghx/bp
-          K6oSvmhO4vCiNOAcq4Hm/RAh1GrpFcqcEw6ZQhoOkGuFoNrW8Vi3UO0d87XUXspOP/Uvkro2Vm9c
-          x3u9Ekq4bWGBOa18qENtaEtPASEf7UL7Hx2psboxWKD+BOUDJ9PknEeHE45ox1Ajk2PTNLwSV5SA
-          TEg3ugjljG+hHKzD+di+FHoEgtHSv8tcyz4vLlT9n/gBcA4GoSyMhVLwnwsPYxb8D/bXWH/ktjB1
-          B4fQFJVQNVhjxfkbV6aosvkMNrMqzWhwCr4BAAD//wMANO06tewCAAA=
+          H4sIAAAAAAAAAwAAAP//jFLLbsIwELznK6w9kypAKIRbH+pDVU+9VK2qyDibxNSxLXtRSxH/Xjm8
+          gqBSLz7M7Ixn1l5FjIEsYMpA1JxEY1V8VX7d3N2/PP7czp9f39QwmTzJ5UNqqb52M+gFhZnNUdBO
+          dSFMYxWSNHpDC4ecMLj2x8N0NJqM+llLNKZAFWSVpTg1cSO1jAfJII2TcdyfbNW1kQI9TNl7xBhj
+          q/YMOXWB3zBlSW+HNOg9rxCm+yHGwBkVEODeS09cE/QOpDCaULfRu7DDcuF5iKYXSm3x9f4eZSrr
+          zMxv+T1eSi19nTvk3ujg6clYaNl1xNhH22dxFBGsM42lnMwn6mCY9Td2cNjigdxWBTLE1RnNkVle
+          IHGpfGcdILiosTgxZAz4opCmQ0SdyqdZznlvaktd/cf+QAiBlrDIrcNCirN9W/Pwxf4a26+4DQx+
+          6QmbvJS6Qmed3DxwafNhxtNEZJc8gWgd/QIAAP//AwCjNKe67gIAAA==
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fdedda0ceb36-SJC
+          - 8f425bbaab9a236e-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -66,7 +64,7 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:31 GMT
+          - Wed, 18 Dec 2024 21:48:39 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -80,25 +78,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "259"
+          - "231"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999845"
+          - "149999888"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_223a9415a5a19029f86768ffbabf3d6f
+          - req_427dff29f2a632ec0882c27c797f5d5a
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-correct-option].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-correct-option].yaml
index 7f73abaa..f126cb68 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-correct-option].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-correct-option].yaml
@@ -1,13 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        my office''s zip code?\n\nOptions:\nA) -8\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 94106\nE) 94107\n\nProposed Answer: the answer
-        is 94107\n\nSingle Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-8\nInsufficient information
+        to answer this question\ncheesecake\n94106\n94107\n\nProposed answer: the answer
+        is 94107", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "536"
+          - "464"
         content-type:
           - application/json
         host:
@@ -46,18 +45,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJJLb4MwEITv/ArLZ6ggoXlwyyGKcql6Sx+qkLEXcGtsyzZSoyj/vTKQ
-          QNRU6oXDfDvD7MIpQAhzhjOEaU0cbbSINuXb02Z73L287ndH1ias2C8k3a0OB1Y949A7VPEJ1F1c
-          D1Q1WoDjSvaYGiAOfGqynKfpMl6u0g40ioHwtkq7KFXRLJ6lUbyK4sVgrBWnYHGG3gOEEDp1T19R
-          MvjGGYrDi9KAtaQCnF2HEMJGCa9gYi23jkiHwxFSJR3IrvV2qhsoW0t8LdkKMejn64uEqrRRhR34
-          VS+55LbODRCrpA+1Tmnc0XOA0Ee3UHvTEWujGu1yp75A+sAkeezz8HjCCR2YU46IqWkR3onLGTjC
-          hZ1cBFNCa2CjdTwfaRlXExBMlv5d5l52vziX1X/iR0ApaAcs1wYYp7cLj2MG/A/219j1yF1hbI/W
-          QZOXXFZgtOH9Ny51vi5JQdZlPF/h4Bz8AAAA//8DAKuPA4PsAgAA
+          H4sIAAAAAAAAAwAAAP//jJI/b8MgEMV3fwrEbFf+FznxllSpOlTKVHWoKovA2abFgACrSaN89won
+          jR01lbow3O/e493BIUAIc4ZLhGlLHO20iJb15/3DelPvN7De7lY6fukfnxl7ylaL5RcOvUJt34G6
+          H9UdVZ0W4LiSJ0wNEAfeNSmyfDabz5LFADrFQHhZo12Uq6jjkkdpnOZRXETJ/KxuFadgcYleA4QQ
+          OgynzykZ7HCJ4vCn0oG1pAFcXpoQwkYJX8HEWm4dkQ6HI6RKOpBD9EWexMWUGah7S3w+2Qtxrh8v
+          lwnVaKO29swv9ZpLbtvKALFKemPrlMYDPQYIvQ1D9Vc5sTaq065y6gOkN1wkJzs8rnKE6Zk55YiY
+          aLLwhlnFwBEu7GQnmBLaAhuV4wJJz7iagGAy8u8st7xPY3PZ/Md+BJSCdsAqbYBxej3v2GbA/7O/
+          2i4rHgJju7cOuqrmsgGjDT+9cq0rFqdsliXbvMDBMfgGAAD//wMAitN9t/MCAAA=
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fdc06b78cf26-SJC
+          - 8f425bb60cfe17e4-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,7 +64,7 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:24 GMT
+          - Wed, 18 Dec 2024 21:48:39 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -79,25 +78,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "291"
+          - "538"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999868"
+          - "149999891"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_89e3d88c7f12861d7e774e452300b36d
+          - req_9bd9d799783ab13ef59ce8e5ca7fd25f
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-incorrect-option].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-incorrect-option].yaml
index 45376f48..842cfbf0 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-incorrect-option].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-incorrect-option].yaml
@@ -1,13 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        my office''s zip code?\n\nOptions:\nA) -8\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 94106\nE) 94107\n\nProposed Answer: the answer
-        is 94106\n\nSingle Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-8\nInsufficient information
+        to answer this question\ncheesecake\n94106\n94107\n\nProposed answer: the answer
+        is 94106", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "536"
+          - "464"
         content-type:
           - application/json
         host:
@@ -46,18 +45,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAA4ySX2uDMBTF3/0UIc912M7WzreVjv1hDPo06BiSJlfNFpMsibBS+t1H1KplHezF
-          h/O753ju1UOAEOYMpwjTkjhaaRHe5tuX1erpfsv3z8s4ed1sHvL1191+UT9WDk+8Q+0+gLqT64qq
-          SgtwXMkWUwPEgU+dJtdxnETJct6ASjEQ3lZoF8YqnEWzOIyWYbTojKXiFCxO0VuAEEKH5ukrSgbf
-          OEXR5KRUYC0pAKf9EELYKOEVTKzl1hHZ1u0gVdKBbFqvx7qBvLbE15K1EJ1+7F8kVKGN2tmO93rO
-          JbdlZoBYJX2odUrjhh4DhN6bheqzjlgbVWmXOfUJ0gdOp/M2Dw8nHNGOOeWIGJsWkwtxGQNHuLCj
-          i2BKaAlssA7nIzXjagSC0dK/y1zKbhfnsvhP/AAoBe2AZdoA4/R84WHMgP/B/hrrj9wUxnZvHVRZ
-          zmUBRhvefuNcZyS5YUtG6DTHwTH4AQAA//8DAK9WW8vsAgAA
+          H4sIAAAAAAAAA4ySPW/CMBCG9/wKyzOpEggfYauqbgwsnaoqMs4lmDo+y74IWsR/rxw+ElQqdfFw
+          z72v3zv7GDHGVcmXjMutINlYHT9X+5dXO1mtdEbYzla7w/z7sH5bi30+rfkoKHCzA0lX1ZPExmog
+          heaMpQNBEFzT+SSbThfTdNGBBkvQQVZbijOMG2VUPE7GWZzM43RxUW9RSfB8yd4jxhg7dmfIaUo4
+          8CVLRtdKA96LGvjy1sQYd6hDhQvvlSdhiI96KNEQmC56nqXJbMgcVK0XIZ9ptb7UT7fLNNbW4cZf
+          +K1eKaP8tnAgPJpg7Akt7+gpYuyjG6q9y8mtw8ZSQfgJJhjm6dmO96vs4fjCCEnogWYyemBWlEBC
+          aT/YCZdCbqHslf0CRVsqHIBoMPLvLI+8z2MrU//HvgdSgiUoC+ugVPJ+3r7NQfhnf7XdVtwF5v7L
+          EzRFpUwNzjp1fuXKFpNcZInMZyLh0Sn6AQAA//8DAL5Pl0/zAgAA
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fdcb6d3b645e-SJC
+          - 8f425bb64ed0fa36-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,7 +64,7 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:26 GMT
+          - Wed, 18 Dec 2024 21:48:39 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -79,25 +78,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "282"
+          - "247"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999868"
+          - "149999891"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_0f4462cd5dd31fe3e1a9d6847e563042
+          - req_eb9ad02601ae4b1b2b579657ed9a7bef
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-several-options].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-several-options].yaml
index abe7094e..26df9d56 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-several-options].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-several-options].yaml
@@ -1,13 +1,13 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        my office''s zip code?\n\nOptions:\nA) -8\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 94106\nE) 94107\n\nProposed Answer: the answer
-        is 94106 or 94107\n\nSingle Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-8\nInsufficient information
+        to answer this question\ncheesecake\n94106\n94107\n\nProposed answer: the answer
+        is 94106 or 94107", "role": "user"}], "model": "gpt-4o-mini", "temperature":
+        0}'
       headers:
         accept:
           - application/json
@@ -16,7 +16,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "545"
+          - "473"
         content-type:
           - application/json
         host:
@@ -36,7 +36,7 @@ interactions:
         x-stainless-raw-response:
           - "true"
         x-stainless-retry-count:
-          - "1"
+          - "0"
         x-stainless-runtime:
           - CPython
         x-stainless-runtime-version:
@@ -46,18 +46,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jFJBS8MwGL33V4ScV+m2sm69CaLTgyjIDhMpWfK1zZYmIUnFMfbfJW3X
-          dqjgJYf3vvfyvpecAoQwZzhFmJbE0UqL8DbfPt8V+4U8bO6PL0/rTUnWD8XrVr59PkZ44hVqtwfq
-          LqobqiotwHElW5oaIA686zSZx3ESJcukISrFQHhZoV0Yq3AWzeIwWobRohOWilOwOEXvAUIInZrT
-          R5QMvnCKoskFqcBaUgBO+yGEsFHCI5hYy60j0uHJQFIlHcgmdTTGDeS1JT6WrIXo8HN/kVCFNmpn
-          O77Hcy65LTMDxCrpTa1TGjfsOUDoo1movsqItVGVdplTB5DecDpdtX54qHDEdpxTjogRPOtauLbL
-          GDjChR01gimhJbBBOtRHasbViAhGS/8M85t3uziXxX/sB4JS0A5Ypg0wTq8XHsYM+A/211hfchMY
-          26N1UGU5lwUYbXj7xrnOVjnZkVUezZc4OAffAAAA//8DAAUNxI3sAgAA
+          H4sIAAAAAAAAAwAAAP//jFLLbsIwELznK6w9kyoJz3Jrq35ARakqVVVk7E0wdWzLXkQrxL9XDhBA
+          UKkXH2Z2xjNrbxPGQEmYMhBLTqJxOn2oNk/PzWJFr+KlmM03j/NZ8TbavMt+UVfQiwq7WKGgo+pO
+          2MZpJGXNnhYeOWF0zcf9wXA4GeaTlmisRB1ltaN0YNNGGZUWWTFIs3GaTw7qpVUCA0zZR8IYY9v2
+          jDmNxG+Ysqx3RBoMgdcI026IMfBWRwR4CCoQNwS9EymsITRt9HPYY7UOPEYza60P+K67R9vaebsI
+          B77DK2VUWJYeebAmegayDlp2lzD22fZZX0QE523jqCT7hSYa3g/3dnDa4ok8VAWyxPUNzYVZKZG4
+          0uFsHSC4WKK8MmQM+Foqe0YkZ5Wvs9zy3tdWpv6P/YkQAh2hLJ1HqcTNvq15/GJ/jXUrbgND+AmE
+          TVkpU6N3Xu0fuHLlqBJ5hnmGC0h2yS8AAAD//wMALlTCsO4CAAA=
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fdd6bc71fa2e-SJC
+          - 8f425bb169ea22f6-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,9 +65,15 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:27 GMT
+          - Wed, 18 Dec 2024 21:48:38 GMT
         Server:
           - cloudflare
+        Set-Cookie:
+          - __cf_bm=d8n1B6AzFA1xougxyBgoPLD0ITgb.iimKMM9kNYr6NA-1734558518-1.0.1.1-c8MRCOD4wNoPcANGb9a6gOWsl6NhHqx911Ktp.RARxFa..7XVR9hKaZVQ2nRa8g.bTL2e2pT7EpsuMaFLlx6Sw;
+            path=/; expires=Wed, 18-Dec-24 22:18:38 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=DPEKvT7hx6XvGnKxQqNrPq5Y4dSqkyQo4hPKRlWd79E-1734558518261-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
         Transfer-Encoding:
           - chunked
         X-Content-Type-Options:
@@ -79,25 +85,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "249"
+          - "168"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999865"
+          - "149999888"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_72f1a2af642dad2884e52d652e775182
+          - req_becb26d30d1adf2d410f311a4664a6b2
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-unsure-option].yaml b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-unsure-option].yaml
index 607cd8a2..5b56af9b 100644
--- a/tests/cassettes/TestLitQAEvaluation.test_grade[matched-unsure-option].yaml
+++ b/tests/cassettes/TestLitQAEvaluation.test_grade[matched-unsure-option].yaml
@@ -1,13 +1,12 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"content": "Given the following question and a proposed
-        answer to the question, return the single-letter choice in the question that
-        matches the proposed answer. If the proposed answer is blank or an empty string,
-        or multiple options are matched, respond with ''0''.\n\nQuestion: Q: What is
-        my office''s zip code?\n\nOptions:\nA) -8\nB) Insufficient information to answer
-        this question\nC) cheesecake\nD) 94106\nE) 94107\n\nProposed Answer: Insufficient
-        information\n\nSingle Letter Answer:", "role": "user"}], "model": "gpt-4o"}'
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\n-8\nInsufficient information
+        to answer this question\ncheesecake\n94106\n94107\n\nProposed answer: Insufficient
+        information", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "541"
+          - "469"
         content-type:
           - application/json
         host:
@@ -46,18 +45,19 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJJLb4MwEITv/ArLZ6gIoDy4pS+lPeRWVWpVIWMWcGNsyzZSqyj/vTIQ
-          IGoq9cJhvp1hduHoIYRZgVOEaU0sbRQPtuXb/u4pP7w0z69kt3+I6221q/fN/SOJNPadQ+afQO3Z
-          dUNlozhYJkWPqQZiwaUuVnGSrMLVetmBRhbAna1SNkhkEIVREoTrIFwOxloyCgan6N1DCKFj93QV
-          RQFfOEWhf1YaMIZUgNNxCCGsJXcKJsYwY4mw2J8glcKC6FrfznUNZWuIqyVazgf9NL6Iy0ppmZuB
-          j3rJBDN1poEYKVyosVLhjp48hD66hdqLjlhp2SibWXkA4QIXi6jPw9MJZ3RgVlrC56bYvxKXFWAJ
-          42Z2EUwJraGYrNP5SFswOQPebOnfZa5l94szUf0nfgKUgrJQZEpDwejlwtOYBveD/TU2HrkrjM23
-          sdBkJRMVaKVZ/41LlW1KkpNNGcZr7J28HwAAAP//AwBPQ8gX7AIAAA==
+          H4sIAAAAAAAAAwAAAP//jFJNj9MwEL3nV1g+NygpLc32tnwcOC2CA0gIRa4zTgZsj9eeaIFV/zty
+          2iZdsUhcfHhv3vN7o3kshJDYyb2QelCsXbDlrXl48y58+njv4i3s3n7+cPf6d1g3d1XT0Be5ygo6
+          fAfNF9ULTS5YYCR/onUExZBd693LzXbbbOtmIhx1YLOsD1xuqHTosVxX601Z7cq6OasHQg1J7sXX
+          QgghHqc35/Qd/JR7Ua0uiIOUVA9yPw8JISPZjEiVEiZWnuVqITV5Bj9Ff+/TaAxqBM8CvaHoVO4g
+          mITy6QGi4AGTuB8hzd0uf4AZk8pV/GjtGT/OuSz1IdIhnfkZN+gxDW0ElcjnDIkpyIk9FkJ8m/qP
+          TyrJEMkFbpl+gM+GTXOyk8vWF3J35phY2QW+2a6eMWs7YIU2Xa1PaqUH6Bblsms1dkhXRHFV+e8s
+          z3mfaqPv/8d+IbSGwNC1IUKH+mnfZSxCPsl/jc0rngLL9CsxuNag7yGGiKeDMKF9ZXRdQV3BQRbH
+          4g8AAAD//wMAIVEMVh4DAAA=
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fdd11ed0175e-SJC
+          - 8f425bb5cdb77ac1-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,7 +65,7 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:27 GMT
+          - Wed, 18 Dec 2024 21:48:39 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -79,25 +79,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "196"
+          - "262"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999867"
+          - "149999890"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_9dd0f40823dceb910336a862f0513c68
+          - req_ca5799089a4ca130483ac0a6fa172710
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/test_eval_answer[llm basic].yaml b/tests/cassettes/test_eval_answer[llm basic].yaml
index 63f1bb18..18f9bfd9 100644
--- a/tests/cassettes/test_eval_answer[llm basic].yaml	
+++ b/tests/cassettes/test_eval_answer[llm basic].yaml	
@@ -7,7 +7,7 @@ interactions:
         other output is permitted.\n\nQuestion: Which of the following is most likely
         true:\n\nA) Piggie, B) Pigeon, C) Gerald\n\n\nCorrect answer: C\n\nProposed
         answer: Based on all factors considered, the most compelling answer is Gerald,
-        C", "role": "user"}], "model": "gpt-4o", "temperature": 0}'
+        C", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
       headers:
         accept:
           - application/json
@@ -16,7 +16,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "516"
+          - "521"
         content-type:
           - application/json
         host:
@@ -46,18 +46,16 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJI/b8IwEMX3fArLM6kCpPzbGDpQVahSh4pWVWTsS+Li+CzbkaCI7145
-          AZKqVOri4X73nt+dfYwIoVLQBaG8ZJ5XRsXL/G09e3zePyX1YbxZrhzuynKTi9WXW7/SQVDg9hO4
-          v6juOFZGgZeoW8wtMA/BdTgdp+k0mc5GDahQgAqywvg4xXiUjNI4mcXJ5CwsUXJwdEHeI0IIOTZn
-          iKgF7OmCJINLpQLnWAF0cW0ihFpUoUKZc9J5pj0ddJCj9qCb1JuHlz6xkNeOhWC6VupcP12vUlgY
-          i1t35td6LrV0ZWaBOdTB1nk0tKGniJCPZqT6R0pqLFbGZx53oIPhMLlv/Wi3xB49M4+eqb5oMrhh
-          lwnwTCrX2wnljJcgOmm3QFYLiT0Q9Yb+HeaWdzu41MV/7DvAORgPIjMWhOQ/B+7aLIQv9lfbdclN
-          YOoOzkOV5VIXYI2V7SvnJpvnbMvmeTKe0egUfQMAAP//AwAWS34s7gIAAA==
+          H4sIAAAAAAAAA4ySy2rDMBBF9/4KoXVcnMTOa1dKCl00lAZKHxSjSGNbjSwJaUJTSv69yHnYoSl0
+          o8WcuVd3RvqOCKFS0BmhvGLIa6vi6+LzZr54vh2tB/Xr0zgTi8Lh/ZY93vHlA+0FhVl9AMej6oqb
+          2ipAafQecwcMIbj2x8M0yyZZf9KA2ghQQVZajFMT11LLeJAM0jgZx/3JQV0ZycHTGXmLCCHkuzlD
+          Ti1gS2ck6R0rNXjPSqCzUxMh1BkVKpR5Lz0yjbTXQm40gm6iv8yXXeKg2HgW0umNUof67nSVMqV1
+          ZuUP/FQvpJa+yh0wb3Sw9WgsbeguIuS9GWlzlpJaZ2qLOZo16GDYT7K9H2032aEHhgaZ6opGvQt2
+          uQBkUvnOTihnvALRStsFso2QpgOiztC/w1zy3g8udfkf+xZwDhZB5NaBkPx84LbNQfhnf7WdltwE
+          pv7LI9R5IXUJzjq5f+XC5sMpSxM+HbGERrvoBwAA//8DAJN7IxXzAgAA
       headers:
-        CF-Cache-Status:
-          - DYNAMIC
         CF-RAY:
-          - 8f39fdb5cae1158a-SJC
+          - 8f425bb118049453-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -65,14 +63,14 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:22 GMT
+          - Wed, 18 Dec 2024 21:48:38 GMT
         Server:
           - cloudflare
         Set-Cookie:
-          - __cf_bm=lVkT7i5qloNOJW3VW5kf8Ohm6U080WiPUv6XirXCoFk-1734470782-1.0.1.1-nAgxt2GizSWkF.auEc_j1tv3Erjbd74Lsh9WJmMaZa_E8fpVuEZ8SsBIqLBHICQDV0sfwSjHgP9mTBHQujl_XA;
-            path=/; expires=Tue, 17-Dec-24 21:56:22 GMT; domain=.api.openai.com; HttpOnly;
+          - __cf_bm=shlFi0WrRQqtHm9BFHA8BA_DE3OgD.WLNX_BG0MJ.Uc-1734558518-1.0.1.1-dTPiGPfeRXm4eFyNx5Qhh98ITpHISNJJ15gnJl7VfBbOzj3CoF.H.Mssss_WvoWjPSiaq4ZWwBKCF16.mbMFig;
+            path=/; expires=Wed, 18-Dec-24 22:18:38 GMT; domain=.api.openai.com; HttpOnly;
             Secure; SameSite=None
-          - _cfuvid=YCWb3aZdtzEmsWTuiPgC.gchnL7jvJLEWh9yvJqAiAw-1734470782603-0.0.1.1-604800000;
+          - _cfuvid=LbfayFWmgFkPH4gOfhOfLicD7koAa3IqwrVpt0Q2uQ0-1734558518270-0.0.1.1-604800000;
             path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
         Transfer-Encoding:
           - chunked
@@ -82,28 +80,30 @@ interactions:
           - X-Request-ID
         alt-svc:
           - h3=":443"; ma=86400
+        cf-cache-status:
+          - DYNAMIC
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "124"
+          - "226"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29998"
         x-ratelimit-remaining-tokens:
-          - "29999876"
+          - "149999877"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_84a9ec4746765b74e4d84610ebc880ad
+          - req_c627f8d13c1969c6fd3a26f94a43a44f
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/test_eval_llm_config.yaml b/tests/cassettes/test_eval_llm_config.yaml
index 383479dc..7268d855 100644
--- a/tests/cassettes/test_eval_llm_config.yaml
+++ b/tests/cassettes/test_eval_llm_config.yaml
@@ -5,7 +5,7 @@ interactions:
         question, and a proposed answer to the question. Please tell me if the proposed
         answer is correct, given the correct answer. ONLY SAY ''YES'' OR ''NO''. No
         other output is permitted.\n\nQuestion: What is 25 * 10?\n\nCorrect answer:
-        250\n\nProposed answer: 250", "role": "user"}], "model": "gpt-4o", "temperature":
+        250\n\nProposed answer: 250", "role": "user"}], "model": "gpt-4o-mini", "temperature":
         0.5}'
       headers:
         accept:
@@ -15,7 +15,7 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "387"
+          - "392"
         content-type:
           - application/json
         host:
@@ -45,18 +45,18 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAAwAAAP//jJLNasMwEITvfgqhc1ycH+okt0Ja6KUthBzaUowirW2lslZIG0gJefci
-          x4kT2kIvPsy3M55de58wxrXic8ZlLUg2zqR35dvTLF+8PJgRPhLUrtwszWKVPa92M+CD6MD1BiSd
-          XDcSG2eANNojlh4EQUwd5uPJJM/y6bgFDSow0VY5SieYjrLRJM2maXbbGWvUEgKfs/eEMcb27TNW
-          tAp2fM6ywUlpIARRAZ+fhxjjHk1UuAhBBxKW+KCHEi2BbVu/3i8viYdyG0QsZrfGdPrh/CqDlfO4
-          Dh0/66W2OtSFBxHQxthA6HhLDwljH+1K26uW3HlsHBWEn2BjYD47xvH+hj0cdoyQhOnlaXeF67BC
-          AQltwsVFuBSyBtU7+/OJrdJ4AZKLlX92+S37uLa21X/ieyAlOAJVOA9Ky+t9+zEP8Qf7a+x84rYw
-          D1+BoClKbSvwzuvjNy5dIfKZmiohhyVPDsk3AAAA//8DADQLsKzsAgAA
+          H4sIAAAAAAAAAwAAAP//jJJfa4MwFMXf/RQhz3Vo/8zWt1H60LExaAdjjCFpctVsMQlJpCul333E
+          WrVsg734cH73HM+9egwQwpzhFGFaEkcrLcK7fL9cMbp+vF/L/fLpsH3eVGabLDb5w+QFj7xD7T6A
+          uovrhqpKC3BcyTOmBogDnxonk+lsNp/F8wZUioHwtkK7cKrCiksejqPxNIySMJ637lJxChan6C1A
+          CKFj8/Q9JYMvnKJodFEqsJYUgNNuCCFslPAKJtZy64h0eNRDqqQD2VR/XW2HxEBeW+LbyVqIVj91
+          rxKq0EbtbMs7PeeS2zIzQKySPtY6pXFDTwFC781K9VVLrI2qtMuc+gTpA5PFOQ73h+xh3DKnHBG9
+          PG+vcB2WMXCECzu4CKaElsB6Z38+UjOuBiAYrPyzy2/Z57W5LP4T3wNKQTtgmTbAOL3etx8z4P+y
+          v8a6EzeFsT1YB1WWc1mA0Yafv3Gus9ucxhHEEexwcAq+AQAA//8DAPSiOYXxAgAA
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8f39fdbac9db643b-SJC
+          - 8f425bb65e7c6453-SJC
         Connection:
           - keep-alive
         Content-Encoding:
@@ -64,7 +64,7 @@ interactions:
         Content-Type:
           - application/json
         Date:
-          - Tue, 17 Dec 2024 21:26:23 GMT
+          - Wed, 18 Dec 2024 21:48:39 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -78,25 +78,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "231"
+          - "229"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "10000"
+          - "30000"
         x-ratelimit-limit-tokens:
-          - "30000000"
+          - "150000000"
         x-ratelimit-remaining-requests:
-          - "9999"
+          - "29999"
         x-ratelimit-remaining-tokens:
-          - "29999909"
+          - "149999909"
         x-ratelimit-reset-requests:
-          - 6ms
+          - 2ms
         x-ratelimit-reset-tokens:
           - 0s
         x-request-id:
-          - req_6538a77713d1ec9b61a8e15f3cf37377
+          - req_1f8e0a7bd96417a061e010db00f50b6f
       status:
         code: 200
         message: OK
diff --git a/tests/cassettes/test_extract_answer[complex].yaml b/tests/cassettes/test_extract_answer[complex].yaml
new file mode 100644
index 00000000..8b076955
--- /dev/null
+++ b/tests/cassettes/test_extract_answer[complex].yaml
@@ -0,0 +1,109 @@
+interactions:
+  - request:
+      body:
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\nEconomic factors\nSocial
+        unrest\nPolitical corruption\n\nProposed answer: Based on the context given,
+        Serif et al. (2026) claim that the overwhelming cause of regime collapse arises
+        from economic factors. Yet, most other scholars (Gerald and Robinson for example)
+        believe the collapse was due to social unrest because of the prolonged epidemic
+        of 2025. I tend to agree with the majority - although I can see both sides.
+        Thus my response is that the social unrest was the significant factor in the
+        collapse of the regime.", "role": "user"}], "model": "gpt-4o-mini", "temperature":
+        0}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "866"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.4
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.4
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "1"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAAwAAAP//jJLNasMwEITvfgqhc1zsNE5S30qhhzZQaKE5lGIUaW2rkbVCkklDyLsX
+          OT92aAq96LDfzmh2pV1ECJWC5oTymnneGBXfl5uHx4VKt7PNy3KdLerpUj8/vavXxtkNHQUFrr6A
+          +5PqhmNjFHiJ+oC5BeYhuKaz20mWzbP0rgMNClBBVhkfTzBupJbxOBlP4mQWp/OjukbJwdGcfESE
+          ELLrzpBTC/imOUlGp0oDzrEKaH5uIoRaVKFCmXPSeaY9HfWQo/agu+hvyCVTpNUW3EWPhbJ1LOTU
+          rVLH+v58qcLKWFy5Iz/XS6mlqwsLzKEOFziPhnZ0HxHy2Q3XXuSlxmJjfOFxDToYptPs4Ef7nfZ0
+          fGQePVND0Wx0xa4Q4JlUbrAdyhmvQfTSfpWsFRIHIBoM/TvMNe/D4FJX/7HvAedgPIjCWBCSXw7c
+          t1kIP+6vtvOSu8DUbZ2HpiilrsAaKw/vXZpiWvI0gTSBFY320Q8AAAD//wMA8VLBff0CAAA=
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8f425bb69fe5250c-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Wed, 18 Dec 2024 21:48:39 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "491"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29999"
+        x-ratelimit-remaining-tokens:
+          - "149999790"
+        x-ratelimit-reset-requests:
+          - 2ms
+        x-ratelimit-reset-tokens:
+          - 0s
+        x-request-id:
+          - req_0446ed4c188b77427f33f74f91e0d112
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/tests/cassettes/test_extract_answer[empty-proposal].yaml b/tests/cassettes/test_extract_answer[empty-proposal].yaml
new file mode 100644
index 00000000..576fc15e
--- /dev/null
+++ b/tests/cassettes/test_extract_answer[empty-proposal].yaml
@@ -0,0 +1,108 @@
+interactions:
+  - request:
+      body:
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\nA\nB\nC\n\nProposed answer:
+        ", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "374"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.4
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.4
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAAwAAAP//jFJNS8QwFLz3V4R33kq72/3qTUQ8CSLiRaSkyWsbTZOQpKgs+98l3d22
+          y67gJYeZN5OZl+wiQkBwyAmwhnrWGhnfVl939zxzDy+CCfO6KB+TLadl6rbPTyuYBYUuP5D5k+qG
+          6dZI9EKrA80sUo/BNV0vsuVys0w3PdFqjjLIauPjTMetUCKeJ/MsTtZxujmqGy0YOsjJW0QIIbv+
+          DDkVx2/ISTI7IS06R2uEfBgiBKyWAQHqnHCeKg+zkWRaeVR99ClsseocDdFUJ+UR3w/3SF0bq0t3
+          5Ae8Ekq4prBInVbB03ltoGf3ESHvfZ/uLCIYq1vjC68/UQXDdXqwg3GLI3msCl57Kq9ozswKjp4K
+          6SbrAEZZg/zCkBCgHRd6QkSTypdZrnkfagtV/8d+JBhD45EXxiIX7Grf3jx8sb/GhhX3gcH9OI9t
+          UQlVozVWHB64MsWqYmmCaYIlRPvoFwAA//8DACbc6TvuAgAA
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8f425bb11ee3eb30-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Wed, 18 Dec 2024 21:48:38 GMT
+        Server:
+          - cloudflare
+        Set-Cookie:
+          - __cf_bm=3EbR6c_9nmNeI58TWDLCiyFbbzWnxiCAQfgz1Ou5oXQ-1734558518-1.0.1.1-_OVXY1MiEfz9j5Sl02ocx_beYJRhzMj_5kdzhk9Gq_NIORYBNM4OqmSmTCUwNu.EObKQiWZdQdrwqZ84sr8.cQ;
+            path=/; expires=Wed, 18-Dec-24 22:18:38 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=U.00GXQIFA3gE8IldpDjXxcp1niJXAkehSRhHT85pWs-1734558518279-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "249"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29998"
+        x-ratelimit-remaining-tokens:
+          - "149999913"
+        x-ratelimit-reset-requests:
+          - 2ms
+        x-ratelimit-reset-tokens:
+          - 0s
+        x-request-id:
+          - req_63d20bd456f7f2145bc66a3ae269bc1e
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/tests/cassettes/test_extract_answer[gave-two].yaml b/tests/cassettes/test_extract_answer[gave-two].yaml
new file mode 100644
index 00000000..a529a885
--- /dev/null
+++ b/tests/cassettes/test_extract_answer[gave-two].yaml
@@ -0,0 +1,108 @@
+interactions:
+  - request:
+      body:
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\nA\nB\nC\n\nProposed answer:
+        A or B", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "380"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.4
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.4
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAAwAAAP//jFJNTwIxFLzvr2jemTW7fAhyI8TozUSjiRizKd23S7Xb17QPlRD+u+mC
+          gAETLz3MvJnOvHadCAG6hLEAtZCsGmfSSfU5vX6YXd2E2YRGt4Mp493qqZp/PD7jPXSiguZvqPhH
+          daGocQZZk93SyqNkjK75sNcfDEaDfNQSDZVooqx2nPYpbbTVaTfr9tNsmOajnXpBWmGAsXhJhBBi
+          3Z4xpy3xC8Yi6/wgDYYga4TxfkgI8GQiAjIEHVhahs6BVGQZbRv9GPZYLYOM0ezSmB2+2d9jqHae
+          5mHH7/FKWx0WhUcZyEbPwOSgZTeJEK9tn+WviOA8NY4Lpne00XDY29rBYYsHclcVmFiaM5pfZkWJ
+          LLUJR+sAJdUCyxNDIUAuS01HRHJU+TTLOe9tbW3r/9gfCKXQMZaF81hqdbZvax6/2F9j+xW3gSGs
+          AmNTVNrW6J3X2weuXHFZqTzDPMM5JJvkGwAA//8DAOjXCFXuAgAA
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8f425bb11e1069a2-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Wed, 18 Dec 2024 21:48:38 GMT
+        Server:
+          - cloudflare
+        Set-Cookie:
+          - __cf_bm=eAk9PjLOP_uC98HrFuiPUxdGbMOD0FndASetRInyC8E-1734558518-1.0.1.1-czBHIlZrAXhRtJiNtQMJ4FNObmpYfP0sPzRSb84VB2iiFfmBNMFsZOSzB8kN5BWGvHDUXsKgWJTphYPTQzM3FA;
+            path=/; expires=Wed, 18-Dec-24 22:18:38 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=dYXAYAvcpEWoKaqCouzZ9rcGFRQEzhYA4XzFKsQi83I-1734558518200-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "171"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29999"
+        x-ratelimit-remaining-tokens:
+          - "149999912"
+        x-ratelimit-reset-requests:
+          - 2ms
+        x-ratelimit-reset-tokens:
+          - 0s
+        x-request-id:
+          - req_efdaa27fda18e26d87bcadcc80237c76
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/tests/cassettes/test_extract_answer[not in options].yaml b/tests/cassettes/test_extract_answer[not in options].yaml
new file mode 100644
index 00000000..70884d60
--- /dev/null
+++ b/tests/cassettes/test_extract_answer[not in options].yaml	
@@ -0,0 +1,108 @@
+interactions:
+  - request:
+      body:
+        '{"messages": [{"content": "You are evaluating answers for a test which
+        has fixed options. Repeat back which option the proposed answer matches. GIVE
+        ONLY THE VERBATIM TEXT OF A FIXED OPTION. If the proposed answer is empty, invalid,
+        or ambiguous, return an empty string.\n\nOptions:\nB\nC\n\nProposed answer:
+        F", "role": "user"}], "model": "gpt-4o-mini", "temperature": 0}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "372"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.4
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.4
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAAwAAAP//jFLLTsMwELznK6w9NyjpM/QGiAsSEuICEkKR62wSg2Nb9rY8qv47chLa
+          VC0SFx9mdsYza28jxkAWsGQgak6isSq+Kj9ubt0dyflYyIfy8b7mT5ss3Txff6sJjILCrN5Q0K/q
+          QpjGKiRpdEcLh5wwuKaLyXQ2y2Zp1hKNKVAFWWUpnpq4kVrG42Q8jZNFnGa9ujZSoIcle4kYY2zb
+          niGnLvATliwZ/SINes8rhOV+iDFwRgUEuPfSE9cEowMpjCbUbfQh7LBcex6i6bVSPb7b36NMZZ1Z
+          +Z7f46XU0te5Q+6NDp6ejIWW3UWMvbZ91kcRwTrTWMrJvKMOhvPLzg4OWzyQfVUgQ1yd0RyZ5QUS
+          l8oP1gGCixqLE0PGgK8LaQZENKh8muWcd1db6uo/9gdCCLSERW4dFlKc7duahy/219h+xW1g8F+e
+          sMlLqSt01snugUubz0uRJpgmuIJoF/0AAAD//wMAUYws+e4CAAA=
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8f425bb11ca22513-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Wed, 18 Dec 2024 21:48:38 GMT
+        Server:
+          - cloudflare
+        Set-Cookie:
+          - __cf_bm=ABTDwd4t79cPLIko1hPlFoZXxUQ6rzPq8jHwq1Xy7XE-1734558518-1.0.1.1-Qqt3v2jz7xPx17Fx0ehWguxbmaMuZk4B3NM4Z1HW2aMmaaTMq2RvfX.y5A9X5qv4xoO0qWDJdyM.E9ahp.RW5A;
+            path=/; expires=Wed, 18-Dec-24 22:18:38 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=17oj8YL1hlYLaR7o.N8HEjWKDALCyYtBfmHe30jFAG0-1734558518262-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "232"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29996"
+        x-ratelimit-remaining-tokens:
+          - "149992191"
+        x-ratelimit-reset-requests:
+          - 7ms
+        x-ratelimit-reset-tokens:
+          - 3ms
+        x-request-id:
+          - req_e11e29110308fec0a5310bf18d49c27d
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 3469f01c..0962e24c 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from aviary.core import eval_answer
+from aviary.core import eval_answer, extract_answer
 from aviary.utils import MultipleChoiceEvaluation, MultipleChoiceQuestion
 from tests.conftest import VCR_DEFAULT_MATCH_ON
 
@@ -39,6 +39,35 @@ async def test_eval_answer(
     assert await eval_answer(proposed, correct, question, eval_mode) == expected
 
 
+@pytest.mark.vcr
+@pytest.mark.parametrize(
+    ("proposed_answer", "options", "expected"),
+    [
+        pytest.param("A", ["A", "B", "C"], "A", id="exact-uppercase"),
+        pytest.param("a", ["A", "B", "C"], "A", id="exact-lowercase"),
+        pytest.param("F", ["B", "C"], None, id="not in options"),
+        pytest.param("A or B", ["A", "B", "C"], None, id="gave-two"),
+        pytest.param(
+            "Based on the context given, Serif et al. (2026) claim that "
+            "the overwhelming cause of regime collapse arises from economic factors. "
+            "Yet, most other scholars (Gerald and Robinson for example) believe the collapse "
+            "was due to social unrest because of the prolonged epidemic of 2025. I tend to agree "
+            "with the majority - although I can see both sides. Thus my response "
+            "is that the social unrest was the significant factor in the collapse of the regime.",
+            ["Economic factors", "Social unrest", "Political corruption"],
+            "Social unrest",
+            id="complex",
+        ),
+        pytest.param("", ["A", "B", "C"], None, id="empty-proposal"),
+    ],
+)
+@pytest.mark.asyncio
+async def test_extract_answer(
+    proposed_answer: str, options: Sequence[str], expected: str | None
+) -> None:
+    assert await extract_answer(proposed_answer, options) == expected
+
+
 @pytest.mark.vcr
 @pytest.mark.asyncio
 async def test_eval_llm_config():
@@ -108,7 +137,7 @@ def _assert_prompt_is_valid(
                 *ZIP_CODE_QUESTION_IDEAL_DISTRACTORS,
                 "the answer is 14004",
                 MultipleChoiceEvaluation.INCORRECT,
-                "0",
+                None,
                 id="didnt-match-and-no-llm-innate-knowledge",
             ),
             pytest.param(
@@ -129,35 +158,35 @@ def _assert_prompt_is_valid(
                 *ZIP_CODE_QUESTION_IDEAL_DISTRACTORS,
                 "the answer is 94106 or 94107",
                 MultipleChoiceEvaluation.INCORRECT,
-                "0",
+                None,
                 id="matched-several-options",
             ),
             pytest.param(
                 *ZIP_CODE_QUESTION_IDEAL_DISTRACTORS,
                 "",
                 MultipleChoiceEvaluation.INCORRECT,
-                "0",
+                None,
                 id="empty-answer1",
             ),
             pytest.param(
                 *MEANING_OF_LIFE_QUESTION_IDEAL_DISTRACTORS,
                 "14",
                 MultipleChoiceEvaluation.INCORRECT,
-                "0",
+                None,
                 id="didnt-match-and-llm-has-innate-knowledge",
             ),
             pytest.param(
                 *MEANING_OF_LIFE_QUESTION_IDEAL_DISTRACTORS,
                 "",
                 MultipleChoiceEvaluation.INCORRECT,
-                "0",
+                None,
                 id="empty-answer2",
             ),
             pytest.param(
                 *LITQA2_QUESTION_IDEAL_DISTRACTORS,
                 "",
                 MultipleChoiceEvaluation.INCORRECT,
-                "0",
+                None,
                 id="empty-answer3",
             ),
         ],
@@ -169,7 +198,7 @@ async def test_grade(
         distractors: str | list[str],
         actual_answer: str,
         expected_eval: MultipleChoiceEvaluation,
-        expected_extracted_answer: str,
+        expected_extracted_answer: str | None,
     ) -> None:
         """Tests that we can create a multiple choice question and evaluate answers."""
         mc_question = MultipleChoiceQuestion(
@@ -179,7 +208,7 @@ async def test_grade(
             shuffle_seed=42,  # Seed for VCR cassette
         )
         self._assert_prompt_is_valid(mc_question, question, ideal_answer, distractors)
-        evaluation, _, graded_answer = await mc_question.grade(actual_answer)
+        evaluation, graded_answer = await mc_question.grade(actual_answer)
         assert evaluation == expected_eval
         if evaluation == MultipleChoiceEvaluation.CORRECT:
             assert graded_answer == ideal_answer