diff --git a/ai_eval/__init__.py b/ai_eval/__init__.py
index 63412c2..2ef3cb6 100644
--- a/ai_eval/__init__.py
+++ b/ai_eval/__init__.py
@@ -4,3 +4,4 @@
 
 from .shortanswer import ShortAnswerAIEvalXBlock
 from .coding_ai_eval import CodingAIEvalXBlock
+from .multiagent import MultiAgentAIEvalXBlock
diff --git a/ai_eval/base.py b/ai_eval/base.py
index 3e67e7f..e7f03af 100644
--- a/ai_eval/base.py
+++ b/ai_eval/base.py
@@ -4,11 +4,10 @@
 
 from django.utils.translation import gettext_noop as _
 from xblock.core import XBlock
-from xblock.fields import String, Scope, Dict
+from xblock.fields import String, Scope
 from xblock.validation import ValidationMessage
 
-
-from .llm import SupportedModels
+from .llm import SupportedModels, get_llm_response
 
 try:
     from xblock.utils.studio_editable import StudioEditableXBlockMixin
@@ -28,9 +27,6 @@ class AIEvalXBlock(StudioEditableXBlockMixin, XBlock):
     Base class for Xblocks with AI evaluation
     """
 
-    USER_KEY = "USER"
-    LLM_KEY = "LLM"
-
     loader = ResourceLoader(__name__)
 
     icon_class = "problem"
@@ -59,37 +55,8 @@ class AIEvalXBlock(StudioEditableXBlockMixin, XBlock):
         default=SupportedModels.GPT4O.value,
     )
 
-    evaluation_prompt = String(
-        display_name=_("Evaluation prompt"),
-        help=_(
-            "Enter the evaluation prompt given to the model."
-            " The question will be inserted right after it."
-            " The student's answer would then follow the question. Markdown format can be used."
-        ),
-        default="You are a teacher. Evaluate the student's answer for the following question:",
-        multiline_editor=True,
-        scope=Scope.settings,
-    )
-    question = String(
-        display_name=_("Question"),
-        help=_(
-            "Enter the question you would like the students to answer."
-            " Markdown format can be used."
-        ),
-        default="",
-        multiline_editor=True,
-        scope=Scope.settings,
-    )
-
-    messages = Dict(
-        help=_("Dictionary with chat messages"),
-        scope=Scope.user_state,
-        default={USER_KEY: [], LLM_KEY: []},
-    )
     editable_fields = (
         "display_name",
-        "evaluation_prompt",
-        "question",
         "model",
         "model_api_key",
         "model_api_url",
@@ -138,9 +105,6 @@ def validate_field_data(self, validation, data):
                 )
             )
 
-        if not data.question:
-            validation.add(
-                ValidationMessage(
-                    ValidationMessage.ERROR, _("Question field is mandatory")
-                )
-            )
+    def get_llm_response(self, messages):
+        return get_llm_response(self.model, self.model_api_key, messages,
+                                self.model_api_url)
diff --git a/ai_eval/coding_ai_eval.py b/ai_eval/coding_ai_eval.py
index 92f2658..c026b52 100644
--- a/ai_eval/coding_ai_eval.py
+++ b/ai_eval/coding_ai_eval.py
@@ -12,7 +12,6 @@
 from xblock.fields import Dict, Scope, String
 from xblock.validation import ValidationMessage
 
-from .llm import get_llm_response
 from .base import AIEvalXBlock
 from .utils import (
     submit_code,
@@ -62,13 +61,42 @@ class CodingAIEvalXBlock(AIEvalXBlock):
         default=LanguageLabels.Python,
         Scope=Scope.settings,
     )
+
+    evaluation_prompt = String(
+        display_name=_("Evaluation prompt"),
+        help=_(
+            "Enter the evaluation prompt given to the model."
+            " The question will be inserted right after it."
+            " The student's answer would then follow the question. Markdown format can be used."
+        ),
+        default="You are a teacher. Evaluate the student's answer for the following question:",
+        multiline_editor=True,
+        scope=Scope.settings,
+    )
+
+    question = String(
+        display_name=_("Question"),
+        help=_(
+            "Enter the question you would like the students to answer."
+            " Markdown format can be used."
+        ),
+        default="",
+        multiline_editor=True,
+        scope=Scope.settings,
+    )
+
     messages = Dict(
         help=_("Dictionary with messages"),
         scope=Scope.user_state,
         default={USER_RESPONSE: "", AI_EVALUATION: "", CODE_EXEC_RESULT: {}},
     )
 
-    editable_fields = AIEvalXBlock.editable_fields + ("judge0_api_key", "language")
+    editable_fields = AIEvalXBlock.editable_fields + (
+        "question",
+        "evaluation_prompt",
+        "judge0_api_key",
+        "language",
+    )
 
     def resource_string(self, path):
         """Handy helper for getting resources from our kit."""
@@ -134,6 +162,13 @@ def validate_field_data(self, validation, data):
 
         super().validate_field_data(validation, data)
 
+        if not data.question:
+            validation.add(
+                ValidationMessage(
+                    ValidationMessage.ERROR, _("Question field is mandatory")
+                )
+            )
+
         if data.language != LanguageLabels.HTML_CSS and not data.judge0_api_key:
             validation.add(
                 ValidationMessage(
@@ -185,13 +220,7 @@ def get_response(self, data, suffix=""):  # pylint: disable=unused-argument
         ]
 
         try:
-            response = get_llm_response(
-                self.model,
-                self.model_api_key,
-                messages,
-                self.model_api_url,
-            )
-
+            response = self.get_llm_response(messages)
         except Exception as e:
             traceback.print_exc()
             logger.error(
diff --git a/ai_eval/multiagent.py b/ai_eval/multiagent.py
new file mode 100644
index 0000000..475f9a6
--- /dev/null
+++ b/ai_eval/multiagent.py
@@ -0,0 +1,648 @@
+import itertools
+import re
+import textwrap
+import typing
+
+import jinja2
+import pydantic
+from django.utils.translation import gettext_noop as _
+from jinja2.sandbox import SandboxedEnvironment
+from xblock.core import XBlock
+from xblock.exceptions import JsonHandlerError
+from xblock.fields import Boolean, Dict, List, Scope, String
+from xblock.validation import ValidationMessage
+from web_fragments.fragment import Fragment
+
+from .base import AIEvalXBlock
+from .llm import SupportedModels
+
+
+DEFAULT_SUPERVISOR_PROMPT = textwrap.dedent("""
+    You are a supervisor managing an interaction between the following agents: Coach, Character.
+    Based on the conversation, decide which agent should respond next.
+    You can choose from: Character, Coach, or FINISH.
+    You are responsible for managing the flow of the conversation between agents.
+    The conversation should flow naturally with the Character until specific conditions are met.
+    Switch control to the Coach only under the following conditions:
+    (1) The learner makes the same mistake **three times in a row**.
+    (2) The learner **explicitly** asks for help.
+    (3) The learner gets **significantly off topic** and is no longer addressing the learning objectives or the project.
+    If the learner shows minor deviations or uncertainty, let the Character continue interacting with the learner.
+    And if the learner specifically asks for help, you should always call on the Coach.
+    Your goal is to provide enough opportunities for the learner to self-correct and progress naturally without premature intervention.
+    Call the conversation complete and choose FINISH only under the following conditions:
+    (1) The **learning objectives and evaluation criteria are fully met**,
+    (2) The learner explicitly indicates they are done with the conversation, or
+    (3) Progress **stalls** and it becomes evident that the learner cannot achieve the learning objectives after multiple attempts.
+    Always finish the conversation when the learner requests.
+    If the interaction is complete, choose 'FINISH'.
+    Learning Objectives: {{ scenario_data.scenario.learning_objectives }}
+    Evaluation Criteria: {{ scenario_data.evaluation_criteria }}
+
+    Who should act next? Do not give an explanation. Output exactly and only one option.
+    Choose from: ['Character', 'Coach', 'FINISH']
+""").strip()
+
+
+DEFAULT_AGENT_PROMPT = textwrap.dedent("""
+    You are {{ character_name }}.
+    In the given conversation, you are speaking to {{ user_character_name }}, who is described as: {{ user_character_data }}.
+
+    {% if character_data %}
+        Personality details: {{ character_data.professional_summary }}
+        Key competencies: {{ character_data.key_competencies }}
+        Behavioral profile: {{ character_data.behavioral_profile }}
+    {% endif %}
+
+    {% if role == "Coach" %}
+        Learning Objectives: {{ scenario_data.scenario.learning_objectives }}
+        Evaluation Criteria: {{ scenario_data.evaluation_criteria }}
+    {% endif %}
+
+    Case Details: {{ scenario_data.scenario.case_details }}.
+    
+    {% for agent in scenario_data.agents %}
+        {% if role.lower() == agent.role %}
+            {{ agent.instructions }}
+        {% endif %}
+    {% endfor %}
+    Speak in a dialogue fashion, naturally and succinctly.
+    Do not do the work for the student. If the student tries to get you to answer the questions you are asking them to supply information on, redirect them to the task.
+    Do not present tables, lists, or detailed written explanations. For instance, do not say 'the main goals include: 1. ...'
+    Output only the text content of the next message from {{ character_name }}.
+""").strip()
+
+
+DEFAULT_EVALUATOR_PROMPT = textwrap.dedent("""
+    You are an evaluator agent responsible for generating an evaluation report of the conversation after the conversation has concluded.
+    Use the provided chat history to evaluate the learner based on the evaluation criteria.
+    You are evaluating the user based on their input, not the reactions by the other characters (such as the main character or the coach).
+    **Important**: Your only job is to give an evaluation report in well-structured markdown. You are not to chat with the learner. Do not engage in any conversation or provide feedback directly to the user. Do not ask questions, give advice or encouragement, or continue the conversation. Your only job is to produce the evaluation report.
+    Your task is to produce a well-structured markdown report in the following format:
+
+    # Evaluation Report
+
+    {% for criterion in scenario_data.evaluation_criteria %}
+        ## {{ criterion.name }}
+        ### Score: (0-5)/5
+        **Rationale**: Provide a rationale for the score, using specific direct quotes from the conversation as evidence.
+    {% endfor %}
+
+    Your response must adhere to this exact structure, and each score must have a detailed rationale that includes at least one direct quote from the chat history.
+    If you cannot find a direct quote, mention this explicitly and provide an explanation.
+""").strip()
+
+
+DEFAULT_CONVERSATION_FORMAT = textwrap.dedent("""
+    <conversation>
+        {% for message in messages %}
+            <message>
+                <agent>{{ message.agent }}</agent>
+                <name>{{ message.name }}</name>
+                <role>{{ message.role }}</role>
+                <content>{{ message.content | escape }}</content>
+            </message>
+        {% endfor %}
+    </conversation>
+""")
+
+
+class Scenario(pydantic.BaseModel):
+    title: str
+    initial_message: str
+
+
+class ScenarioCharacters(pydantic.BaseModel):
+    user_character: str
+
+
+class ScenarioData(pydantic.BaseModel):
+    scenario: Scenario
+    characters: ScenarioCharacters
+
+
+class Character(pydantic.BaseModel):
+    name: str
+    role: str
+
+
+class CharacterData(pydantic.BaseModel):
+    characters: typing.List[Character]
+
+
+class MultiAgentAIEvalXBlock(AIEvalXBlock):
+    """
+
+    AI-powered XBlock for simulated conversations with
+    multiple agents and custom scenarios.
+
+    """
+
+    _jinja_env = SandboxedEnvironment(undefined=jinja2.StrictUndefined)
+
+    MAIN_CHARACTER_KEY = "main_character"
+    USER_CHARACTER_KEY = "user_character"
+
+    display_name = String(
+        display_name=_("Display Name"),
+        help=_("Name of the component in the studio"),
+        default="Multi-agent AI Evaluation",
+        scope=Scope.settings,
+    )
+
+    supervisor_prompt = String(
+        display_name=_("Supervisor prompt"),
+        help=_(
+            'Prompt used to instruct the model how to choose the next agent. '
+            'Instruct it to choose between one of the roles in "Role '
+            'characters" or the command specified in "Supervisor finish '
+            'command".'
+        ),
+        multiline_editor=True,
+        default=DEFAULT_SUPERVISOR_PROMPT,
+        scope=Scope.settings,
+    )
+
+    finish_command = String(
+        display_name=_("Supervisor finish command"),
+        help=_("Output from the Supervisor to be recognised as end of session"),
+        scope=Scope.settings,
+        default=_("FINISH"),
+    )
+
+    supervisor_prefill = String(
+        display_name=_("Prefill for supervisor reply"),
+        help=_("Prefill used to hint the model when acting as the Supervisor"),
+        scope=Scope.settings,
+        default=_("Choice: "),
+    )
+
+    role_characters = Dict(
+        display_name=_("Agent characters"),
+        help=_(
+            "Mapping of agents used by the Supervisor to character keys "
+            "in scenario data"
+        ),
+        scope=Scope.settings,
+        default={
+            _("User"): USER_CHARACTER_KEY,
+            _("Character"): MAIN_CHARACTER_KEY,
+            _("Coach"): "coach",
+        },
+    )
+
+    agent_prompt = String(
+        display_name=_("Agent prompt"),
+        help=_(
+            "Prompt used to instruct the model how to act as an agent. "
+            "Template variables available are: role, scenario_data, "
+            "character_data, character_name, user_character_data, "
+            "user_character_name"
+        ),
+        multiline_editor=True,
+        default=DEFAULT_AGENT_PROMPT,
+        scope=Scope.settings,
+    )
+
+    evaluator_prompt = String(
+        display_name=_("Evaluator prompt"),
+        help=_(
+            "Prompt used to instruct the model how to evaluate the learner"
+        ),
+        multiline_editor=True,
+        default=DEFAULT_EVALUATOR_PROMPT,
+        scope=Scope.settings,
+    )
+
+    conversation_format = String(
+        display_name=_("Conversation format template"),
+        help=_(
+            "Template used to format the conversation, appended to all prompts"
+        ),
+        multiline_editor=True,
+        default=DEFAULT_CONVERSATION_FORMAT,
+        scope=Scope.settings,
+    )
+
+    message_content_tag = String(
+        display_name=_("Message content tag"),
+        help=_("Tag for finding message content in the model's response"),
+        default="content",
+        scope=Scope.settings,
+    )
+
+    scenario_data = Dict(
+        scope=Scope.settings,
+        default={
+            "scenario": {
+                "title": "",
+                "initial_message": "",
+                "case_details": "",
+                "learning_objectives": [],
+            },
+            "evaluation_criteria": [],
+            "characters": {
+                USER_CHARACTER_KEY: "Alex",
+                MAIN_CHARACTER_KEY: "Jack",
+                "coach": "Maya",
+            },
+            "agents": [],
+        }
+    )
+
+    character_data = Dict(
+        scope=Scope.settings,
+        default={
+            "characters": [],
+        }
+    )
+
+    allow_reset = Boolean(
+        display_name=_("Allow reset"),
+        help=_("Allow the learner to reset the chat"),
+        scope=Scope.settings,
+        default=True,
+    )
+
+    blacklist = List(
+        display_name=_("Output blacklist"),
+        help=_(
+            "List of words that, if present in the AI response, "
+            "will cause the message to not be shown to the learner, "
+            "displaying an error instead"
+        ),
+        scope=Scope.settings,
+        # Prevent the LLM from breaking character and calling itself an AI
+        # assistant if the user tries to do subvert the plot.
+        default=["AI assistant"],
+    )
+
+    finished = Boolean(
+        scope=Scope.user_state,
+        default=False,
+    )
+
+    chat_history = List(
+        scope=Scope.user_state,
+        default=[],
+    )
+
+    editable_fields = AIEvalXBlock.editable_fields + (
+        "scenario_data",
+        "character_data",
+        "supervisor_prompt",
+        "supervisor_prefill",
+        "role_characters",
+        "finish_command",
+        "agent_prompt",
+        "evaluator_prompt",
+        "allow_reset",
+        "blacklist",
+    )
+
+    def studio_view(self, context):
+        """
+        Render a form for editing this XBlock
+        """
+        fragment = super().studio_view(context)
+        fragment.add_javascript(self.resource_string("static/js/src/multiagent_edit.js"))
+        jsoneditor_html = self.resource_string("static/html/jsoneditor-iframe.html")
+        js_data = {
+            'jsoneditor_html': jsoneditor_html,
+        }
+        # MultiAgentAIEvalXBlock() in multiagent_edit.js will call
+        # StudioEditableXBlockMixin().
+        fragment.initialize_js("MultiAgentAIEvalXBlock", js_data)
+        return fragment
+
+    def _render_template(self, template, **context):
+        return self._jinja_env.from_string(template).render(context)
+
+    def _get_character(self, key):
+        """For a given character key, get its agent and character data."""
+        for agent, k in self.role_characters.items():
+            if k == key:
+                name = self.scenario_data["characters"].get(key)
+                data = self._get_character_data(name)
+                return agent, data
+        return "", {}
+
+    def _llm_input(self, prompt, user_input):
+        """Append the chat history to the given system prompt."""
+        main_agent, main_data = self._get_character(self.MAIN_CHARACTER_KEY)
+        user_agent, user_data = self._get_character(self.USER_CHARACTER_KEY)
+        initial_messages = []
+        if self.scenario_data["scenario"]["initial_message"]:
+            initial_messages.append({
+                "role": "assistant",
+                "content": self.scenario_data["scenario"]["initial_message"],
+                "extra": {
+                    "role": main_agent,
+                    "character_data": main_data,
+                },
+            })
+        user_message = {
+            "role": "user",
+            "content": user_input,
+        }
+        chat_history = []
+        # For legacy reasons, stored chat history has the format of a chat
+        # history with an LLM completion, with each message having a "role"
+        # of "user" or "assistant".
+        for message in itertools.chain(initial_messages,
+                                       self.chat_history,
+                                       [user_message]):
+            if message["role"] == "assistant":
+                agent = message["extra"].get("role") or ""
+                character_data = message["extra"].get("character_data") or {}
+            else:
+                agent = user_agent
+                character_data = user_data
+            chat_history.append({
+                "content": message["content"],
+                "agent": agent,
+                "name": character_data.get("name", ""),
+                "role": character_data.get("role", ""),
+            })
+        prompt += "\n\n" + self._render_template(
+            self.conversation_format,
+            messages=chat_history,
+        )
+        yield {"role": "system", "content": prompt}
+        if self.model == SupportedModels.CLAUDE_SONNET.value:
+            # Claude needs a dummy user reply before the first
+            # assistant reply.
+            yield {"role": "user", "content": "."}
+
+    def _get_field_display_name(self, field_name):
+        return self.fields[field_name].display_name
+
+    def validate_field_data(self, validation, data):
+        """Validate field data."""
+        super().validate_field_data(validation, data)
+
+        try:
+            ScenarioData(**data.scenario_data)
+        except pydantic.ValidationError as e:
+            for error in e.errors():
+                field = error["loc"][0]
+                msg = error["msg"]
+                validation.add(ValidationMessage(
+                    ValidationMessage.ERROR,
+                    (
+                        f"{self._get_field_display_name('scenario_data')}: "
+                        f"{field!r}: {msg}"
+                    ),
+                ))
+        try:
+            CharacterData(**data.character_data)
+        except pydantic.ValidationError as e:
+            for error in e.errors():
+                field = error["loc"][0]
+                msg = error["msg"]
+                validation.add(ValidationMessage(
+                    ValidationMessage.ERROR,
+                    (
+                        f"{self._get_field_display_name('character_data')}: "
+                        f"{field!r}: {msg}"
+                    ),
+                ))
+
+        for prompt_field in ['supervisor_prompt', 'evaluator_prompt']:
+            try:
+                self._render_template(getattr(data, prompt_field),
+                                      scenario_data=data.scenario_data)
+            except jinja2.TemplateError as e:
+                validation.add(ValidationMessage(
+                    ValidationMessage.ERROR,
+                    f"{self._get_field_display_name(prompt_field)}: {e}",
+                ))
+
+        try:
+            self._render_template(
+                data.agent_prompt,
+                role="",
+                character_name="",
+                character_data=None,
+                user_character_name="",
+                user_character_data="",
+                scenario_data=data.scenario_data,
+            )
+        except jinja2.TemplateError as e:
+            validation.add(ValidationMessage(
+                ValidationMessage.ERROR,
+                f"{self._get_field_display_name('agent_prompt')}: {e}",
+            ))
+        else:
+            chars = data.character_data.get("characters", [])
+            for i, char_data in enumerate(chars):
+                # Character name is validated above but may be missing yet.
+                char_name = char_data.get("name", "")
+                role = ""
+                for key, name in data.scenario_data.get("characters",
+                                                        {}).items():
+                    if name == char_name:
+                        for r, k in data.role_characters.items():
+                            if k == key:
+                                role = r
+                                break
+                        break
+                try:
+                    self._render_template(
+                        data.agent_prompt,
+                        role=role,
+                        character_name=char_name,
+                        character_data=char_data,
+                        user_character_name="",
+                        user_character_data="",
+                        scenario_data=data.scenario_data,
+                    )
+                except jinja2.TemplateError as e:
+                    validation.add(ValidationMessage(
+                        ValidationMessage.ERROR,
+                        (
+                            f"{self._get_field_display_name('agent_prompt')}/"
+                            f"{self._get_field_display_name('character_data')}"
+                            f"[{i}]: {e}"
+                        ),
+                    ))
+
+    def student_view(self, context=None):
+        """
+        The primary view of the MultiAgentAIEvalXBlock, shown to students
+        when viewing courses.
+        """
+
+        frag = Fragment()
+        scenario = self.scenario_data['scenario']
+        frag.add_content(
+            self.loader.render_django_template(
+                "/templates/chatbox.html",
+                {
+                    "self": self,
+                    "has_finish_button": True,
+                    "question_text": f"<h3><b>{scenario['title']}</b></h3>",
+                },
+            )
+        )
+        frag.add_css(self.resource_string("static/css/chatbox.css"))
+        frag.add_javascript(self.resource_string("static/js/src/utils.js"))
+        frag.add_javascript(self.resource_string("static/js/src/chatbox.js"))
+        frag.add_javascript(self.resource_string("static/js/src/multiagent.js"))
+        marked_html = self.resource_string("static/html/marked-iframe.html")
+        main_agent = ""
+        main_name = ""
+        main_data = {}
+        for agent, key in self.role_characters.items():
+            if key == self.MAIN_CHARACTER_KEY:
+                main_agent = agent
+                main_name = self._get_character_name(agent)
+                main_data = self._get_character_data(main_name)
+                break
+        js_data = {
+            "messages": self.chat_history,
+            "main_character_agent": main_agent,
+            "main_character_data": {
+                "name": main_data.get("name", main_name),
+                "role": main_data.get("role", ""),
+            },
+            "initial_message": scenario["initial_message"],
+            "finished": self.finished,
+            "marked_html": marked_html,
+        }
+        frag.initialize_js("MultiAgentAIEvalXBlock", js_data)
+        return frag
+
+    def _get_next_agent(self, user_input):
+        """Use the LLM to decide which agent should respond to the user."""
+        prompt = self._render_template(self.supervisor_prompt,
+                                       scenario_data=self.scenario_data)
+        messages = list(self._llm_input(prompt, user_input))
+        if self.model == SupportedModels.CLAUDE_SONNET.value:
+            if self.supervisor_prefill:
+                messages.append({"role": "assistant",
+                                 "content": self.supervisor_prefill})
+        response = self.get_llm_response(messages).strip()
+
+        choices = list(itertools.chain(self.role_characters.keys(),
+                                       [self.finish_command]))
+        m = re.search(fr"\b({'|'.join(map(re.escape, choices))})\b",
+                      response, re.I)
+        if not m:
+            raise RuntimeError(f"bad response {response!r}")
+        found = m.group(1)
+        for choice in choices:
+            if choice.lower() == found.lower():
+                return choice
+
+        # Should not be reached.
+        raise RuntimeError("unknown error")
+
+    def _get_character_name(self, agent):
+        """Get character name from agent name (supervisor choice)."""
+        if agent == self.finish_command:
+            return None
+        key = self.role_characters[agent]
+        return self.scenario_data["characters"][key]
+
+    def _get_character_data(self, character_name):
+        """Get character data from character name."""
+        for character_data in self.character_data["characters"]:
+            if character_data["name"] == character_name:
+                return character_data
+        return {}
+
+    def _get_agent_response(self, agent, user_input):
+        """
+
+        Use the LLM to generate a message from the given agent in the scenario.
+
+        """
+        user_name = self.scenario_data["characters"][self.USER_CHARACTER_KEY]
+        user_data = self._get_character_data(user_name)
+        character_name = self._get_character_name(agent)
+        character_data = self._get_character_data(character_name)
+        prompt = self._render_template(
+            self.agent_prompt,
+            scenario_data=self.scenario_data,
+            role=agent,
+            character_data=character_data,
+            character_name=character_name,
+            user_character_data=user_data,
+            user_character_name=user_name,
+        )
+        messages = list(self._llm_input(prompt, user_input))
+        response = self.get_llm_response(messages)
+        if self.blacklist:
+            if re.search(fr"\b({'|'.join(map(re.escape, self.blacklist))})\b",
+                         response, re.I):
+                raise JsonHandlerError(500, "Internal error.")
+        if self.message_content_tag:
+            m = re.search((fr'<{re.escape(self.message_content_tag)}>(.*)'
+                           fr'</{re.escape(self.message_content_tag)}>'),
+                          response)
+            if m:
+                response = m.group(1)
+        return response
+
+    def _get_evaluator_response(self, user_input):
+        """Get the response from the special "Evaluator" agent."""
+        prompt = self._render_template(self.evaluator_prompt,
+                                       scenario_data=self.scenario_data)
+        messages = list(self._llm_input(prompt, user_input))
+        response = self.get_llm_response(messages)
+        return response
+
+    @XBlock.json_handler
+    def get_response(self, data, suffix=""):  # pylint: disable=unused-argument
+        """Generate the next message in the interaction."""
+        # We use the LLM twice here: one time to decide which character to use,
+        # and one time to act as that character.
+
+        if self.finished:
+            raise JsonHandlerError(403, "The session has ended.")
+
+        if data.get("force_finish", False):
+            user_input = ""
+            agent = None
+            is_evaluator = True
+        else:
+            user_input = str(data["user_input"])
+            agent = self._get_next_agent(user_input)
+            is_evaluator = (agent == self.finish_command)
+
+        if is_evaluator:
+            message = self._get_evaluator_response(user_input)
+            self.finished = True
+            character_data = {}
+        else:
+            message = self._get_agent_response(agent, user_input)
+            character_name = self._get_character_name(agent)
+            character_data = self._get_character_data(character_name)
+            character_data = character_data.copy()
+            character_data.setdefault("name", character_name)
+
+        self.chat_history.append({"role": "user", "content": user_input})
+        extra = {"is_evaluator": is_evaluator, "role": agent,
+                 "character_data": character_data}
+        self.chat_history.append({"role": "assistant", "content": message,
+                                  "extra": extra})
+        return {
+            "message": message,
+            "finished": self.finished,
+            "is_evaluator": is_evaluator,
+            "role": agent,
+            "character_data": {
+                "name": character_data.get("name", ""),
+                "role": character_data.get("role", ""),
+            },
+        }
+
+    @XBlock.json_handler
+    def reset(self, data, suffix=""):
+        """Reset the chat history."""
+        if not self.allow_reset:
+            raise JsonHandlerError(403, "Reset is disabled.")
+        self.chat_history = []
+        self.finished = False
+        return {}
diff --git a/ai_eval/shortanswer.py b/ai_eval/shortanswer.py
index cd9b553..460311e 100644
--- a/ai_eval/shortanswer.py
+++ b/ai_eval/shortanswer.py
@@ -11,7 +11,6 @@
 from xblock.fields import Boolean, Integer, String, Scope
 from xblock.validation import ValidationMessage
 
-from .llm import get_llm_response
 from .base import AIEvalXBlock
 
 
@@ -23,6 +22,9 @@ class ShortAnswerAIEvalXBlock(AIEvalXBlock):
     Short Answer Xblock.
     """
 
+    USER_KEY = "USER"
+    LLM_KEY = "LLM"
+
     display_name = String(
         display_name=_("Display Name"),
         help=_("Name of the component in the studio"),
@@ -30,6 +32,29 @@ class ShortAnswerAIEvalXBlock(AIEvalXBlock):
         scope=Scope.settings,
     )
 
+    evaluation_prompt = String(
+        display_name=_("Evaluation prompt"),
+        help=_(
+            "Enter the evaluation prompt given to the model."
+            " The question will be inserted right after it."
+            " The student's answer would then follow the question. Markdown format can be used."
+        ),
+        default="You are a teacher. Evaluate the student's answer for the following question:",
+        multiline_editor=True,
+        scope=Scope.settings,
+    )
+
+    question = String(
+        display_name=_("Question"),
+        help=_(
+            "Enter the question you would like the students to answer."
+            " Markdown format can be used."
+        ),
+        default="",
+        multiline_editor=True,
+        scope=Scope.settings,
+    )
+
     character_image = String(
         display_name=_("Character Image URL"),
         help=_(
@@ -53,7 +78,15 @@ class ShortAnswerAIEvalXBlock(AIEvalXBlock):
         default=False,
     )
 
+    messages = Dict(
+        help=_("Dictionary with chat messages"),
+        scope=Scope.user_state,
+        default={USER_KEY: [], LLM_KEY: []},
+    )
+
     editable_fields = AIEvalXBlock.editable_fields + (
+        "question",
+        "evaluation_prompt",
         "max_responses",
         "allow_reset",
         "character_image",
@@ -66,6 +99,13 @@ def validate_field_data(self, validation, data):
 
         super().validate_field_data(validation, data)
 
+        if not data.question:
+            validation.add(
+                ValidationMessage(
+                    ValidationMessage.ERROR, _("Question field is mandatory")
+                )
+            )
+
         if not data.max_responses or data.max_responses <= 0 or data.max_responses > 9:
             validation.add(
                 ValidationMessage(
@@ -83,15 +123,18 @@ def student_view(self, context=None):
         frag = Fragment()
         frag.add_content(
             self.loader.render_django_template(
-                "/templates/shortanswer.html",
+                "/templates/chatbox.html",
                 {
                     "self": self,
+                    "has_finish_button": False,
+                    "question_text": _("Loading..."),
                 },
             )
         )
 
-        frag.add_css(self.resource_string("static/css/shortanswer.css"))
+        frag.add_css(self.resource_string("static/css/chatbox.css"))
         frag.add_javascript(self.resource_string("static/js/src/utils.js"))
+        frag.add_javascript(self.resource_string("static/js/src/chatbox.js"))
         frag.add_javascript(self.resource_string("static/js/src/shortanswer.js"))
 
         marked_html = self.resource_string("static/html/marked-iframe.html")
@@ -123,21 +166,14 @@ def get_response(self, data, suffix=""):  # pylint: disable=unused-argument
         # add previous messages
         # the first AI role is 'system' which defines the LLM's personnality and behavior.
         # subsequent roles are 'assistant' and 'user'
-        for i in range(len(self.messages[self.USER_KEY])):
-            messages.append(
-                {"content": self.messages[self.USER_KEY][i], "role": "user"}
-            )
-            messages.append(
-                {"content": self.messages[self.LLM_KEY][i], "role": "assistant"}
-            )
-
+        for user_msg, assistant_msg in zip(self.messages[self.USER_KEY],
+                                           self.messages[self.LLM_KEY]):
+            messages.append({"content": user_msg or ".", "role": "user"})
+            messages.append({"content": assistant_msg, "role": "assistant"})
         messages.append({"role": "user", "content": user_submission})
 
         try:
-            response = get_llm_response(
-                self.model, self.model_api_key, messages, self.model_api_url
-            )
-
+            response = self.get_llm_response(messages)
         except Exception as e:
             traceback.print_exc()
             logger.error(
diff --git a/ai_eval/static/css/shortanswer.css b/ai_eval/static/css/chatbox.css
similarity index 85%
rename from ai_eval/static/css/shortanswer.css
rename to ai_eval/static/css/chatbox.css
index 75ca04d..433400c 100644
--- a/ai_eval/static/css/shortanswer.css
+++ b/ai_eval/static/css/chatbox.css
@@ -1,5 +1,3 @@
-/* CSS for ShortAnswerAIEvalXBlock */
-
 .shortanswer_image {
   float: left;
   margin-right: 4rem;
@@ -18,7 +16,7 @@
   cursor: pointer;
 }
 
-.chat-history {
+#chat-history {
   min-height: 250px;
   max-height: 400px;
   overflow-y: auto;
@@ -29,6 +27,7 @@
 .chat-message-container {
   display: flex;
 }
+
 .chat-message-container .chat-message {
   flex: 0 1 auto;
   max-width: 80%;
@@ -45,12 +44,13 @@
 }
 
 .ai-eval,
-.message-spinner {
+#message-spinner {
   text-align: left;
   color: white;
   margin-right: auto;
   background-color: #476480;
 }
+
 .ai-eval * {
   color: white !important;
 }
@@ -62,7 +62,7 @@
   padding: 10px;
 }
 
-.submit-row .user-input {
+.submit-row textarea {
   flex: 8;
   height: auto;
   border: 1px solid gray;
@@ -72,15 +72,16 @@
   resize: none;
 }
 
-#submit-button, #reset-button {
+.submit-row .chat-button {
   flex: 1;
   height: fit-content;
   text-align: center;
-  padding: 10px 0;
+  padding: 10px 5px;
   margin-left: auto;
   border-radius: 5px;
   border: none;
   cursor: pointer;
+  white-space: nowrap;
 }
 
 #submit-button {
@@ -89,18 +90,20 @@
   margin-left: 10px;
 }
 
-#reset-button {
+#reset-button,
+#finish-button {
   border: 1px solid;
   color: #00262b;
   margin-right: 10px;
 }
 
-.submit-row .disabled-btn {
+.submit-row .disabled {
   opacity: 0.8;
   cursor: not-allowed !important;
 }
+
 /* spinner animation */
-.chat-message-container .message-spinner > div {
+.chat-message-container #message-spinner > div {
   width: 4px;
   height: 4px;
   margin-right: 2px;
@@ -112,12 +115,12 @@
   animation: chat-block-sk-bouncedelay 1.4s infinite ease-in-out both;
 }
 
-.chat-message-container.message-spinner .bounce1 {
+.chat-message-container #message-spinner .bounce1 {
   -webkit-animation-delay: -0.32s;
   animation-delay: -0.32s;
 }
 
-.chat-message-container .message-spinner .bounce2 {
+.chat-message-container #message-spinner .bounce2 {
   -webkit-animation-delay: -0.16s;
   animation-delay: -0.16s;
 }
diff --git a/ai_eval/static/html/jsoneditor-iframe.html b/ai_eval/static/html/jsoneditor-iframe.html
new file mode 100644
index 0000000..55c3cb2
--- /dev/null
+++ b/ai_eval/static/html/jsoneditor-iframe.html
@@ -0,0 +1,11 @@
+<!doctype html>
+<html lang="en">
+<head>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jsoneditor/10.1.1/jsoneditor-minimalist.min.js"
+            integrity="sha512-VcA5u6MfLGhVp4tu1jGPXHQQSXPKbR6NQWWSnutvyz5a5cA0+G0GELJp3dJpE3+f1WT2ZMOgbhGf1r7NOIN1Rw=="
+            crossorigin="anonymous"
+            referrerpolicy="no-referrer"></script>
+</head>
+<body>
+</body>
+</html>
diff --git a/ai_eval/static/js/src/chatbox.js b/ai_eval/static/js/src/chatbox.js
new file mode 100644
index 0000000..43512b0
--- /dev/null
+++ b/ai_eval/static/js/src/chatbox.js
@@ -0,0 +1,176 @@
+function ChatBox(runtime, element, data, handleInit, handleResponse,
+                 handleReset) {
+  "use strict";
+
+  loadMarkedInIframe(data.marked_html);
+
+  const handlerUrl = runtime.handlerUrl(element, "get_response");
+  const resetHandlerUrl = runtime.handlerUrl(element, "reset");
+
+  const $chatContainer = $("#chat-history", element);
+  const $spinner = $("#message-spinner", element);
+  const $spinnerContainer = $("#chat-spinner-container", element);
+  const $resetButton = $("#reset-button", element);
+  const $finishButton = $("#finish-button", element);
+  const $submitButton = $("#submit-button", element);
+  const $userInput = $("#user-input", element);
+
+  const enableControl = function($control, enable) {
+    $control.prop("disabled", !enable);
+    $control[enable ? "removeClass" : "addClass"]("disabled");
+  };
+
+  $userInput.on("input", function(event) {
+    const $input = $(this);
+    $input.height(0);
+    $input.height($input.prop("scrollHeight"));
+  });
+
+  const scrollToBottom = function() {
+    $chatContainer.scrollTop($chatContainer.prop("scrollHeight"));
+  };
+
+  const insertMessage = function(class_, content) {
+    const $message = $('<div class="chat-message">');
+    $message.addClass(class_);
+    $message.append(content);
+    const $messageContainer = $('<div class="chat-message-container">');
+    $messageContainer.append($message);
+    $messageContainer.insertBefore($spinnerContainer);
+    scrollToBottom();
+  };
+
+  const deleteLastMessage = function() {
+    $spinnerContainer.prev().remove();
+  };
+
+  const fns = {
+    enableReset: function(enable) {
+      const enabled = !$resetButton.prop("disabled");
+      enableControl($resetButton, enable);
+      return enabled;
+    },
+
+    enableInput: function(enable) {
+      const enabled = !$userInput.prop("disabled");
+      enableControl($userInput, enable);
+      enableControl($submitButton, enable);
+      enableControl($finishButton, enable);
+      return enabled;
+    },
+
+    insertUserMessage: function(content) {
+      if (content) {
+        insertMessage("user-answer", $(MarkdownToHTML(content)));
+      }
+    },
+
+    insertAIMessage: function(content) {
+      insertMessage("ai-eval", content);
+    },
+  };
+
+  const getResponse = function(inputData) {
+    const inputEnabled = fns.enableInput(false);
+    const resetEnabled = fns.enableReset(false);
+    if (inputData.user_input) {
+      fns.insertUserMessage(inputData.user_input);
+      $userInput.val("");
+      $userInput.trigger("input");
+    }
+    $spinner.show();
+    scrollToBottom();
+    $.ajax({
+      url: handlerUrl,
+      method: "POST",
+      data: JSON.stringify(inputData),
+      success: function(response) {
+        $spinner.hide();
+        fns.enableReset(true);
+        handleResponse.call(fns, response);
+      },
+      error: function() {
+        $spinner.hide();
+        fns.enableReset(resetEnabled);
+        fns.enableInput(inputEnabled);
+        if (inputData.user_input) {
+          deleteLastMessage();
+          $userInput.val(inputData.user_input);
+          $userInput.trigger("input");
+        }
+        alert(gettext("An error has occurred."));
+      },
+    });
+  };
+
+  const handleUserInput = function($input) {
+    if ($input.prop("disabled")) {
+      return;
+    }
+    if (!$input.val()) {
+      return;
+    }
+    getResponse({ user_input: $input.val() });
+  };
+
+  $userInput.keypress(function(event) {
+    if (event.keyCode == 13 && !event.shiftKey) {
+      event.preventDefault();
+      handleUserInput($(this));
+      return false;
+    }
+  });
+
+  $submitButton.click(function() {
+    if ($(this).prop("disabled")) {
+      return;
+    }
+    handleUserInput($userInput);
+  });
+
+  $finishButton.click(function() {
+    if ($(this).prop("disabled")) {
+      return;
+    }
+    getResponse({ force_finish: true });
+  });
+
+  $resetButton.click(function() {
+    if ($(this).prop("disabled")) {
+      return;
+    }
+    const inputEnabled = fns.enableInput(false);
+    const resetEnabled = fns.enableReset(false);
+    $spinner.show();
+    scrollToBottom();
+    $.ajax({
+      url: resetHandlerUrl,
+      method: "POST",
+      data: JSON.stringify({}),
+      success: function() {
+        $spinner.hide();
+        $spinnerContainer.prevAll('.chat-message-container').remove();
+        fns.enableInput(true);
+        handleReset.call(fns);
+      },
+      error: function() {
+        $spinner.hide();
+        fns.enableReset(resetEnabled);
+        fns.enableInput(inputEnabled);
+        alert(gettext("An error has occurred."));
+      },
+    });
+  });
+
+  var initDone = false;
+
+  const init = function() {
+    if (initDone) {
+      return;
+    }
+    initDone = true;
+    handleInit.call(fns);
+  };
+
+  runFuncAfterLoading(init);
+}
diff --git a/ai_eval/static/js/src/multiagent.js b/ai_eval/static/js/src/multiagent.js
new file mode 100644
index 0000000..5811e1a
--- /dev/null
+++ b/ai_eval/static/js/src/multiagent.js
@@ -0,0 +1,81 @@
+/* Javascript for MultiAgentAIEvalXBlock. */
+function MultiAgentAIEvalXBlock(runtime, element, data) {
+  "use strict";
+
+  const formatAIMessage = function(content, is_evaluator, agent,
+                                   character_data) {
+    var name;
+    if (is_evaluator) {
+      name = gettext("Evaluator");
+    } else {
+      if (character_data && character_data.name) {
+        name = character_data.name;
+      }
+      var role_text;
+      if (agent && agent !== data.main_character_agent) {
+        role_text = `<i>${agent}</i>`;
+      }
+      if (character_data && character_data.role) {
+        if (role_text) {
+          role_text = `${role_text}, ${character_data.role}`;
+        } else {
+          role_text = character_data.role;
+        }
+      }
+      if (role_text) {
+        if (name) {
+          name = `${name} (${role_text})`;
+        } else {
+          name = role_text;
+        }
+      }
+    }
+    if (name) {
+      name = `${name}:`;
+    } else {
+      name = "";
+    }
+
+    return $(`
+      <b>${name}</b>
+      ${MarkdownToHTML(content)}
+    `);
+  };
+
+  const formatInitialMessage = function() {
+    return formatAIMessage(data.initial_message, false,
+                           data.main_character_agent,
+                           data.main_character_data);
+  };
+
+  const handleInit = function() {
+    this.insertAIMessage(formatInitialMessage());
+    for (var i = 0; i < data.messages.length; i++) {
+      var message = data.messages[i];
+      if (message.role === "user") {
+        this.insertUserMessage(message.content);
+      } else {
+        this.insertAIMessage(formatAIMessage(message.content,
+                                             message.extra.is_evaluator,
+                                             message.extra.role,
+                                             message.extra.character_data));
+      }
+    }
+    this.enableReset(data.messages.length > 0 || data.finished);
+    this.enableInput(!data.finished);
+  };
+
+  const handleResponse = function(response) {
+    this.insertAIMessage(formatAIMessage(response.message,
+                                         response.is_evaluator,
+                                         response.role,
+                                         response.character_data));
+    this.enableInput(!response.finished);
+  };
+
+  const handleReset = function() {
+    this.insertAIMessage(formatInitialMessage());
+  };
+
+  ChatBox(runtime, element, data, handleInit, handleResponse, handleReset);
+}
diff --git a/ai_eval/static/js/src/multiagent_edit.js b/ai_eval/static/js/src/multiagent_edit.js
new file mode 100644
index 0000000..c301068
--- /dev/null
+++ b/ai_eval/static/js/src/multiagent_edit.js
@@ -0,0 +1,100 @@
+/* Javascript for MultiAgentAIEvalXBlock. */
+function MultiAgentAIEvalXBlock(runtime, element, data) {
+    "use strict";
+
+    StudioEditableXBlockMixin(runtime, element);
+
+    var $fields = $('#xb-field-edit-scenario_data, #xb-field-edit-character_data');
+
+    var addFileInput = function() {
+        var $wrapper = $('<div/>');
+        $wrapper.css('margin-left', 'calc(25% + 15px)');
+        $wrapper.css('margin-top', '5px');
+        var $fileInput = $('<input type="file"/>');
+        $fileInput.css('width', 'calc(45% - 10px)');
+        $wrapper.append($fileInput);
+        var $loadButton = $('<button class="action" type="button"/>');
+        $loadButton.append(gettext("Load"));
+        $loadButton.click(loadFile);
+        $loadButton.css('margin-left', '10px');
+        $wrapper.append($loadButton);
+
+        $(this).closest('.wrapper-comp-setting').append($wrapper);
+    }
+
+    var loadFile = function() {
+        var $button = $(this);
+        var $fileInput = $button.prev('input[type="file"]');
+        var $field = $button.closest('.wrapper-comp-setting').children('textarea');
+        var file = $fileInput[0].files[0];
+        if (file !== undefined) {
+            var reader = new FileReader();
+            reader.onload = function(e) {
+                $field.val(JSON.stringify(JSON.parse(e.target.result), null, 2));
+                $field.trigger("change");
+            }
+            reader.readAsText(file);
+        }
+    }
+
+    $fields.each(addFileInput);
+
+    var addJSONEditor = function() {
+        var $field = $(this);
+        var $container = $('<div/>');
+        $container.css('display', 'inline-block');
+        $container.css('vertical-align', 'top');
+        $container.css('width', '45%');
+        $field.css('display', 'none');
+        $container.insertAfter($field);
+        var options = {
+            enableSort: false,
+            enableTransform: false,
+            modes: ['tree', 'text'],
+            navigationBar: false,
+            search: false,
+            onChangeText: function(value) {
+                $field.val(value);
+                $field.trigger("change");
+            },
+        };
+        var editor = new JSONEditor($container[0], options);
+        editor.setText($field.val());
+
+        var $wrapper = $field.closest('li');
+        var $resetButton = $wrapper.find('button.setting-clear');
+        $resetButton.click(function() {
+            editor.setText($field.val());
+        });
+
+        $field.on('change', function() {
+            var value = $field.val();
+            if (value !== editor.getText()) {
+                editor.setText(value);
+            }
+        });
+    }
+
+    if (window.JSONEditor !== undefined) {
+        $fields.each(addJSONEditor);
+    } else {
+        $('head').append($(
+            '<link ' +
+            'rel="stylesheet" ' +
+            'href="https://cdnjs.cloudflare.com/ajax/libs/jsoneditor/10.1.1/' +
+            'jsoneditor.min.css" ' +
+            'integrity="sha512-8G+Vb2+10BSrSo+wupdzJIylDLpGtEYniQhp0rsbTigPG' +
+            '7Onn2S08Ai/KEGlxN2Ncx9fGqVHtRehMuOjPb9f8g==" ' +
+            'crossorigin="anonymous" ' +
+            'referrerpolicy="no-referrer" />'
+        ));
+        var $jsoneditorIframe = $('<iframe>');
+        $jsoneditorIframe.css('display', 'none');
+        $jsoneditorIframe.on('load', function() {
+            window.JSONEditor = $(this)[0].contentWindow.JSONEditor;
+            $fields.each(addJSONEditor);
+        });
+        $jsoneditorIframe.attr('srcdoc', data.jsoneditor_html);
+        $(document.body).append($jsoneditorIframe);
+    }
+}
diff --git a/ai_eval/static/js/src/shortanswer.js b/ai_eval/static/js/src/shortanswer.js
index ab11eb8..d0681fc 100644
--- a/ai_eval/static/js/src/shortanswer.js
+++ b/ai_eval/static/js/src/shortanswer.js
@@ -1,129 +1,27 @@
 /* Javascript for ShortAnswerAIEvalXBlock. */
 function ShortAnswerAIEvalXBlock(runtime, element, data) {
-  const handlerUrl = runtime.handlerUrl(element, "get_response");
-  const resetHandlerURL = runtime.handlerUrl(element, "reset");
+  "use strict";
 
-  loadMarkedInIframe(data.marked_html);
+  const formatAIMessage = function(msg) {
+    return $(MarkdownToHTML(msg));
+  };
 
-  $(function () {
-    const spinner = $(".message-spinner", element);
-    const spinnnerContainer = $("#chat-spinner-container", element);
-    const resetButton = $("#reset-button", element);
-    const submitButton = $("#submit-button", element);
-    const userInput = $(".user-input", element);
-    const userInputElem = userInput[0];
-    let initDone = false;
-
-    runFuncAfterLoading(init);
-
-    function getResponse() {
-      if (!userInput.val().length) return;
-
-      disableInput();
-      spinner.show();
-      insertUserMessage(userInput.val());
-      $.ajax({
-        url: handlerUrl,
-        method: "POST",
-        data: JSON.stringify({ user_input: userInput.val() }),
-        success: function (response) {
-          spinner.hide();
-          insertAIMessage(response.response);
-          userInput.val("");
-          if ($(".user-answer", element).length >= data.max_responses) {
-            disableInput();
-          } else {
-            enableInput();
-          }
-        },
-        error: function (jqXHR, textStatus, errorThrown) {
-          spinner.hide();
-          alert(errorThrown);
-
-          deleteLastMessage();
-          enableInput();
-        },
-      });
-    }
-
-    submitButton.click(getResponse);
-
-    resetButton.click(() => {
-      if (!resetButton.hasClass("disabled-btn")) {
-        $.ajax({
-          url: resetHandlerURL,
-          method: "POST",
-          data: JSON.stringify({}),
-          success: function (data) {
-            spinnnerContainer.prevAll('.chat-message-container').remove();
-            resetButton.addClass("disabled-btn");
-            enableInput();
-          },
-          error: function(xhr, status, error) {
-            console.error('Error:', error);
-            alert("A problem occured during reset.");
-          }
-        });
-      }
-    });
-
-    function disableInput() {
-      userInput.prop("disabled", true);
-      userInput.removeAttr("placeholder");
-      submitButton.prop("disabled", true);
-      submitButton.addClass("disabled-btn");
+  const handleInit = function() {
+    $("#question-text", element).html(MarkdownToHTML(data.question));
+    for (var i = 0; i < data.messages.USER.length; i++) {
+      this.insertUserMessage(data.messages.USER[i]);
+      this.insertAIMessage(formatAIMessage(data.messages.LLM[i]));
     }
+    this.enableInput(data.messages.USER.length < data.max_responses);
+    this.enableReset(data.messages.USER.length > 0);
+  };
 
-    function enableInput() {
-      userInput.prop("disabled", false);
-      submitButton.prop("disabled", false);
-      submitButton.removeClass("disabled-btn");
-    }
-
-    function adjustTextareaHeight(element) {
-      element.style.height = "";
-      element.style.height = element.scrollHeight + "px";
-    }
-    userInputElem.addEventListener("input", (event) => {
-      adjustTextareaHeight(userInputElem);
-    });
-
-    function init() {
-      if (initDone) return;
-      initDone = true;
-      $("#question-text", element).html(MarkdownToHTML(data.question));
-      for (let i = 0; i < data.messages.USER.length; i++) {
-        insertUserMessage(data.messages.USER[i]);
-        insertAIMessage(data.messages.LLM[i]);
-        resetButton.removeClass("disabled-btn");
-      }
-      if (
-        data.messages.USER.length &&
-        data.messages.USER.length >= data.max_responses
-      ) {
-        disableInput();
-      }
-    }
+  const handleResponse = function(response) {
+    this.insertAIMessage(formatAIMessage(response.response));
+    this.enableInput($(".user-answer", element).length < data.max_responses);
+  };
 
-    function insertUserMessage(msg) {
-      if (msg?.length) {
-        $(` <div class="chat-message-container">
-                <div class="chat-message user-answer">${MarkdownToHTML(msg)}</div>
-      </div>`).insertBefore(spinnnerContainer);
-        resetButton.removeClass("disabled-btn");
-      }
-    }
+  const handleReset = function() {};
 
-    function insertAIMessage(msg) {
-      if (msg?.length) {
-        $(` <div class="chat-message-container">
-                <div class="chat-message ai-eval">${MarkdownToHTML(msg)}</div>
-      </div>`).insertBefore(spinnnerContainer);
-        resetButton.removeClass("disabled-btn");
-      }
-    }
-    function deleteLastMessage() {
-      spinnnerContainer.prev().remove();
-    }
-  });
+  ChatBox(runtime, element, data, handleInit, handleResponse, handleReset);
 }
diff --git a/ai_eval/templates/shortanswer.html b/ai_eval/templates/chatbox.html
similarity index 53%
rename from ai_eval/templates/shortanswer.html
rename to ai_eval/templates/chatbox.html
index 7a7293d..ceb2cbd 100644
--- a/ai_eval/templates/shortanswer.html
+++ b/ai_eval/templates/chatbox.html
@@ -7,14 +7,14 @@
 <div class="shortanswer_block">
     <div>
         <div id="question-text">
-            Loading ..
+            {{ question_text | safe }}
         </div>
     </div>
 
     <div>
-        <div class="chat-history">
+        <div id="chat-history">
             <div class="chat-message-container" id="chat-spinner-container">
-                <div class="chat-message message-spinner" style="display: none;">
+                <div class="chat-message" id="message-spinner" style="display: none;">
                     <div class="bounce1">
                     </div>
                     <div class="bounce2">
@@ -26,10 +26,13 @@
         </div>
         <div class="submit-row">
             {% if self.allow_reset %}
-                <span id="reset-button" class="disabled-btn">Reset chat</span>
+                <span class="chat-button" id="reset-button">Reset chat</span>
             {% endif %}
-            <textarea class="user-input" rows="1" placeholder="Type your answer here" maxlength="1000"></textarea>
-            <span id="submit-button">Submit <i class="fa fa-paper-plane"></i></span>
+            {% if has_finish_button %}
+                <span class="chat-button" id="finish-button">Submit for evaluation</span>
+            {% endif %}
+            <textarea id="user-input" rows="1" placeholder="Type your answer here" maxlength="1000"></textarea>
+            <span class="chat-button" id="submit-button">Submit <i class="fa fa-paper-plane"></i></span>
         </div>
     </div>
-</div>
\ No newline at end of file
+</div>
diff --git a/requirements/base.in b/requirements/base.in
index c02823b..bb26920 100644
--- a/requirements/base.in
+++ b/requirements/base.in
@@ -3,4 +3,5 @@
 
 django-statici18n
 XBlock[django]
-litellm
\ No newline at end of file
+litellm
+Jinja2
diff --git a/requirements/base.txt b/requirements/base.txt
index 6e546df..f7bc48e 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -1,18 +1,18 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    make upgrade
 #
-aiohappyeyeballs==2.3.4
+aiohappyeyeballs==2.4.4
     # via aiohttp
-aiohttp==3.10.0
+aiohttp==3.11.10
     # via litellm
-aiosignal==1.3.1
+aiosignal==1.3.2
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
-anyio==4.4.0
+anyio==4.7.0
     # via
     #   httpx
     #   openai
@@ -20,35 +20,29 @@ appdirs==1.4.4
     # via fs
 asgiref==3.8.1
     # via django
-async-timeout==4.0.3
-    # via aiohttp
-attrs==23.2.0
+attrs==24.3.0
     # via
     #   aiohttp
     #   jsonschema
     #   referencing
-backports-zoneinfo==0.2.1 ; python_version < "3.9"
-    # via
-    #   -c requirements/constraints.txt
-    #   django
-boto3==1.34.151
+boto3==1.35.83
     # via fs-s3fs
-botocore==1.34.151
+botocore==1.35.83
     # via
     #   boto3
     #   s3transfer
-certifi==2024.7.4
+certifi==2024.12.14
     # via
     #   httpcore
     #   httpx
     #   requests
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via requests
 click==8.1.7
     # via litellm
 distro==1.9.0
     # via openai
-django==4.2.14
+django==4.2.17
     # via
     #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
     #   django-appconf
@@ -56,13 +50,11 @@ django==4.2.14
     #   openedx-django-pyfs
 django-appconf==1.0.6
     # via django-statici18n
-django-statici18n==2.5.0
+django-statici18n==2.6.0
     # via -r requirements/base.in
-exceptiongroup==1.2.2
-    # via anyio
-filelock==3.15.4
+filelock==3.16.1
     # via huggingface-hub
-frozenlist==1.4.1
+frozenlist==1.5.0
     # via
     #   aiohttp
     #   aiosignal
@@ -73,70 +65,72 @@ fs==2.4.16
     #   xblock
 fs-s3fs==1.1.1
     # via openedx-django-pyfs
-fsspec==2024.6.1
+fsspec==2024.10.0
     # via huggingface-hub
 h11==0.14.0
     # via httpcore
-httpcore==1.0.5
+httpcore==1.0.7
     # via httpx
-httpx==0.27.0
-    # via openai
-huggingface-hub==0.24.5
+httpx==0.27.2
+    # via
+    #   litellm
+    #   openai
+huggingface-hub==0.27.0
     # via tokenizers
-idna==3.7
+idna==3.10
     # via
     #   anyio
     #   httpx
     #   requests
     #   yarl
-importlib-metadata==6.11.0
+importlib-metadata==8.5.0
+    # via litellm
+jinja2==3.1.4
     # via
-    #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
+    #   -r requirements/base.in
     #   litellm
-importlib-resources==6.4.0
-    # via
-    #   jsonschema
-    #   jsonschema-specifications
-jinja2==3.1.4
-    # via litellm
+jiter==0.8.2
+    # via openai
 jmespath==1.0.1
     # via
     #   boto3
     #   botocore
 jsonschema==4.23.0
     # via litellm
-jsonschema-specifications==2023.12.1
+jsonschema-specifications==2024.10.1
     # via jsonschema
 lazy==1.6
     # via xblock
-litellm==1.42.6
+litellm==1.55.4
     # via -r requirements/base.in
-lxml==5.2.2
+lxml==5.3.0
     # via xblock
-mako==1.3.5
+mako==1.3.8
     # via xblock
-markupsafe==2.1.5
+markupsafe==3.0.2
     # via
     #   jinja2
     #   mako
     #   xblock
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
-openai==1.37.1
+openai==1.58.1
     # via litellm
-openedx-django-pyfs==3.6.0
+openedx-django-pyfs==3.7.0
     # via xblock
-packaging==24.1
+packaging==24.2
     # via huggingface-hub
-pkgutil-resolve-name==1.3.10
-    # via jsonschema
-pydantic==2.8.2
+propcache==0.2.1
+    # via
+    #   aiohttp
+    #   yarl
+pydantic==2.10.4
     # via
     #   litellm
     #   openai
-pydantic-core==2.20.1
+pydantic-core==2.27.2
     # via pydantic
 python-dateutil==2.9.0.post0
     # via
@@ -144,9 +138,9 @@ python-dateutil==2.9.0.post0
     #   xblock
 python-dotenv==1.0.1
     # via litellm
-pytz==2024.1
+pytz==2024.2
     # via xblock
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   huggingface-hub
     #   xblock
@@ -154,22 +148,22 @@ referencing==0.35.1
     # via
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.7.24
+regex==2024.11.6
     # via tiktoken
 requests==2.32.3
     # via
     #   huggingface-hub
     #   litellm
     #   tiktoken
-rpds-py==0.19.1
+rpds-py==0.22.3
     # via
     #   jsonschema
     #   referencing
-s3transfer==0.10.2
+s3transfer==0.10.4
     # via boto3
-simplejson==3.19.2
+simplejson==3.19.3
     # via xblock
-six==1.16.0
+six==1.17.0
     # via
     #   fs
     #   fs-s3fs
@@ -179,43 +173,39 @@ sniffio==1.3.1
     #   anyio
     #   httpx
     #   openai
-sqlparse==0.5.1
+sqlparse==0.5.3
     # via django
-tiktoken==0.7.0
+tiktoken==0.8.0
     # via litellm
-tokenizers==0.19.1
+tokenizers==0.21.0
     # via litellm
-tqdm==4.66.4
+tqdm==4.67.1
     # via
     #   huggingface-hub
     #   openai
 typing-extensions==4.12.2
     # via
-    #   annotated-types
     #   anyio
-    #   asgiref
     #   huggingface-hub
     #   openai
     #   pydantic
     #   pydantic-core
-urllib3==1.26.19
+urllib3==2.2.3
     # via
     #   botocore
     #   requests
 web-fragments==2.2.0
     # via xblock
-webob==1.8.7
+webob==1.8.9
     # via xblock
 xblock[django]==4.0.1
     # via
     #   -c requirements/constraints.txt
     #   -r requirements/base.in
-yarl==1.9.4
+yarl==1.18.3
     # via aiohttp
-zipp==3.19.2
-    # via
-    #   importlib-metadata
-    #   importlib-resources
+zipp==3.21.0
+    # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools
diff --git a/requirements/ci.txt b/requirements/ci.txt
index f0993ea..8ec4e7c 100644
--- a/requirements/ci.txt
+++ b/requirements/ci.txt
@@ -1,38 +1,34 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    make upgrade
 #
-cachetools==5.4.0
+cachetools==5.5.0
     # via tox
 chardet==5.2.0
     # via tox
 colorama==0.4.6
     # via tox
-distlib==0.3.8
+distlib==0.3.9
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via
     #   tox
     #   virtualenv
-packaging==24.1
+packaging==24.2
     # via
     #   pyproject-api
     #   tox
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   tox
     #   virtualenv
 pluggy==1.5.0
     # via tox
-pyproject-api==1.7.1
+pyproject-api==1.8.0
     # via tox
-tomli==2.0.1
-    # via
-    #   pyproject-api
-    #   tox
-tox==4.16.0
+tox==4.23.2
     # via -r requirements/ci.in
-virtualenv==20.26.3
+virtualenv==20.28.0
     # via tox
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 85f0896..c84b00f 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,18 +1,18 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    make upgrade
 #
-aiohappyeyeballs==2.3.4
+aiohappyeyeballs==2.4.4
     # via
     #   -r requirements/quality.txt
     #   aiohttp
-aiohttp==3.10.0
+aiohttp==3.11.10
     # via
     #   -r requirements/quality.txt
     #   litellm
-aiosignal==1.3.1
+aiosignal==1.3.2
     # via
     #   -r requirements/quality.txt
     #   aiohttp
@@ -20,7 +20,7 @@ annotated-types==0.7.0
     # via
     #   -r requirements/quality.txt
     #   pydantic
-anyio==4.4.0
+anyio==4.7.0
     # via
     #   -r requirements/quality.txt
     #   httpx
@@ -37,48 +37,39 @@ asgiref==3.8.1
     # via
     #   -r requirements/quality.txt
     #   django
-astroid==3.2.4
+astroid==3.3.6
     # via
     #   -r requirements/quality.txt
     #   pylint
     #   pylint-celery
-async-timeout==4.0.3
-    # via
-    #   -r requirements/quality.txt
-    #   aiohttp
-attrs==23.2.0
+attrs==24.3.0
     # via
     #   -r requirements/quality.txt
     #   aiohttp
     #   jsonschema
     #   referencing
-backports-zoneinfo==0.2.1 ; python_version < "3.9"
-    # via
-    #   -c requirements/constraints.txt
-    #   -r requirements/quality.txt
-    #   django
 binaryornot==0.4.4
     # via
     #   -r requirements/quality.txt
     #   cookiecutter
-boto3==1.34.151
+boto3==1.35.83
     # via
     #   -r requirements/quality.txt
     #   fs-s3fs
-botocore==1.34.151
+botocore==1.35.83
     # via
     #   -r requirements/quality.txt
     #   boto3
     #   s3transfer
-build==1.2.1
+build==1.2.2.post1
     # via
     #   -r requirements/pip-tools.txt
     #   pip-tools
-cachetools==5.4.0
+cachetools==5.5.0
     # via
     #   -r requirements/ci.txt
     #   tox
-certifi==2024.7.4
+certifi==2024.12.14
     # via
     #   -r requirements/quality.txt
     #   httpcore
@@ -90,7 +81,7 @@ chardet==5.2.0
     #   -r requirements/quality.txt
     #   binaryornot
     #   tox
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   -r requirements/quality.txt
     #   requests
@@ -108,7 +99,7 @@ click-log==0.4.0
     # via
     #   -r requirements/quality.txt
     #   edx-lint
-code-annotations==1.8.0
+code-annotations==2.1.0
     # via
     #   -r requirements/quality.txt
     #   edx-lint
@@ -120,17 +111,17 @@ cookiecutter==2.6.0
     # via
     #   -r requirements/quality.txt
     #   xblock-sdk
-coverage[toml]==7.6.0
+coverage[toml]==7.6.9
     # via
     #   -r requirements/quality.txt
     #   pytest-cov
 ddt==1.7.2
     # via -r requirements/quality.txt
-dill==0.3.8
+dill==0.3.9
     # via
     #   -r requirements/quality.txt
     #   pylint
-distlib==0.3.8
+distlib==0.3.9
     # via
     #   -r requirements/ci.txt
     #   virtualenv
@@ -138,7 +129,7 @@ distro==1.9.0
     # via
     #   -r requirements/quality.txt
     #   openai
-django==4.2.14
+django==4.2.17
     # via
     #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
     #   -r requirements/quality.txt
@@ -151,25 +142,20 @@ django-appconf==1.0.6
     # via
     #   -r requirements/quality.txt
     #   django-statici18n
-django-statici18n==2.5.0
+django-statici18n==2.6.0
     # via -r requirements/quality.txt
-edx-i18n-tools==1.6.2
+edx-i18n-tools==1.6.3
     # via -r requirements/quality.txt
-edx-lint==5.3.7
+edx-lint==5.4.1
     # via -r requirements/quality.txt
-exceptiongroup==1.2.2
-    # via
-    #   -r requirements/quality.txt
-    #   anyio
-    #   pytest
-filelock==3.15.4
+filelock==3.16.1
     # via
     #   -r requirements/ci.txt
     #   -r requirements/quality.txt
     #   huggingface-hub
     #   tox
     #   virtualenv
-frozenlist==1.4.1
+frozenlist==1.5.0
     # via
     #   -r requirements/quality.txt
     #   aiohttp
@@ -185,7 +171,7 @@ fs-s3fs==1.1.1
     #   -r requirements/quality.txt
     #   openedx-django-pyfs
     #   xblock-sdk
-fsspec==2024.6.1
+fsspec==2024.10.0
     # via
     #   -r requirements/quality.txt
     #   huggingface-hub
@@ -193,37 +179,30 @@ h11==0.14.0
     # via
     #   -r requirements/quality.txt
     #   httpcore
-httpcore==1.0.5
+httpcore==1.0.7
     # via
     #   -r requirements/quality.txt
     #   httpx
-httpx==0.27.0
+httpx==0.27.2
     # via
     #   -r requirements/quality.txt
+    #   litellm
     #   openai
-huggingface-hub==0.24.5
+huggingface-hub==0.27.0
     # via
     #   -r requirements/quality.txt
     #   tokenizers
-idna==3.7
+idna==3.10
     # via
     #   -r requirements/quality.txt
     #   anyio
     #   httpx
     #   requests
     #   yarl
-importlib-metadata==6.11.0
+importlib-metadata==8.5.0
     # via
-    #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
-    #   -r requirements/pip-tools.txt
     #   -r requirements/quality.txt
-    #   build
     #   litellm
-importlib-resources==6.4.0
-    # via
-    #   -r requirements/quality.txt
-    #   jsonschema
-    #   jsonschema-specifications
 iniconfig==2.0.0
     # via
     #   -r requirements/quality.txt
@@ -238,6 +217,10 @@ jinja2==3.1.4
     #   code-annotations
     #   cookiecutter
     #   litellm
+jiter==0.8.2
+    # via
+    #   -r requirements/quality.txt
+    #   openai
 jmespath==1.0.1
     # via
     #   -r requirements/quality.txt
@@ -247,7 +230,7 @@ jsonschema==4.23.0
     # via
     #   -r requirements/quality.txt
     #   litellm
-jsonschema-specifications==2023.12.1
+jsonschema-specifications==2024.10.1
     # via
     #   -r requirements/quality.txt
     #   jsonschema
@@ -255,15 +238,20 @@ lazy==1.6
     # via
     #   -r requirements/quality.txt
     #   xblock
-litellm==1.42.6
+litellm==1.55.4
     # via -r requirements/quality.txt
-lxml==5.2.2
+lxml[html-clean]==5.3.0
     # via
     #   -r requirements/quality.txt
     #   edx-i18n-tools
+    #   lxml-html-clean
     #   xblock
     #   xblock-sdk
-mako==1.3.5
+lxml-html-clean==0.4.1
+    # via
+    #   -r requirements/quality.txt
+    #   lxml
+mako==1.3.8
     # via
     #   -r requirements/quality.txt
     #   xblock
@@ -271,7 +259,7 @@ markdown-it-py==3.0.0
     # via
     #   -r requirements/quality.txt
     #   rich
-markupsafe==2.1.5
+markupsafe==3.0.2
     # via
     #   -r requirements/quality.txt
     #   jinja2
@@ -287,20 +275,20 @@ mdurl==0.1.2
     #   markdown-it-py
 mock==5.1.0
     # via -r requirements/quality.txt
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   -r requirements/quality.txt
     #   aiohttp
     #   yarl
-openai==1.37.1
+openai==1.58.1
     # via
     #   -r requirements/quality.txt
     #   litellm
-openedx-django-pyfs==3.6.0
+openedx-django-pyfs==3.7.0
     # via
     #   -r requirements/quality.txt
     #   xblock
-packaging==24.1
+packaging==24.2
     # via
     #   -r requirements/ci.txt
     #   -r requirements/pip-tools.txt
@@ -314,17 +302,13 @@ path==16.16.0
     # via
     #   -r requirements/quality.txt
     #   edx-i18n-tools
-pbr==6.0.0
+pbr==6.1.0
     # via
     #   -r requirements/quality.txt
     #   stevedore
 pip-tools==7.4.1
     # via -r requirements/pip-tools.txt
-pkgutil-resolve-name==1.3.10
-    # via
-    #   -r requirements/quality.txt
-    #   jsonschema
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   -r requirements/ci.txt
     #   -r requirements/quality.txt
@@ -341,14 +325,19 @@ polib==1.2.0
     # via
     #   -r requirements/quality.txt
     #   edx-i18n-tools
-pycodestyle==2.12.0
+propcache==0.2.1
+    # via
+    #   -r requirements/quality.txt
+    #   aiohttp
+    #   yarl
+pycodestyle==2.12.1
     # via -r requirements/quality.txt
-pydantic==2.8.2
+pydantic==2.10.4
     # via
     #   -r requirements/quality.txt
     #   litellm
     #   openai
-pydantic-core==2.20.1
+pydantic-core==2.27.2
     # via
     #   -r requirements/quality.txt
     #   pydantic
@@ -356,7 +345,7 @@ pygments==2.18.0
     # via
     #   -r requirements/quality.txt
     #   rich
-pylint==3.2.6
+pylint==3.3.2
     # via
     #   -r requirements/quality.txt
     #   edx-lint
@@ -367,7 +356,7 @@ pylint-celery==0.3
     # via
     #   -r requirements/quality.txt
     #   edx-lint
-pylint-django==2.5.5
+pylint-django==2.6.1
     # via
     #   -r requirements/quality.txt
     #   edx-lint
@@ -380,23 +369,23 @@ pypng==0.20220715.0
     # via
     #   -r requirements/quality.txt
     #   xblock-sdk
-pyproject-api==1.7.1
+pyproject-api==1.8.0
     # via
     #   -r requirements/ci.txt
     #   tox
-pyproject-hooks==1.1.0
+pyproject-hooks==1.2.0
     # via
     #   -r requirements/pip-tools.txt
     #   build
     #   pip-tools
-pytest==8.3.2
+pytest==8.3.4
     # via
     #   -r requirements/quality.txt
     #   pytest-cov
     #   pytest-django
-pytest-cov==5.0.0
+pytest-cov==6.0.0
     # via -r requirements/quality.txt
-pytest-django==4.8.0
+pytest-django==4.9.0
     # via -r requirements/quality.txt
 python-dateutil==2.9.0.post0
     # via
@@ -413,11 +402,11 @@ python-slugify==8.0.4
     #   -r requirements/quality.txt
     #   code-annotations
     #   cookiecutter
-pytz==2024.1
+pytz==2024.2
     # via
     #   -r requirements/quality.txt
     #   xblock
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   -r requirements/quality.txt
     #   code-annotations
@@ -430,7 +419,7 @@ referencing==0.35.1
     #   -r requirements/quality.txt
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.7.24
+regex==2024.11.6
     # via
     #   -r requirements/quality.txt
     #   tiktoken
@@ -442,25 +431,25 @@ requests==2.32.3
     #   litellm
     #   tiktoken
     #   xblock-sdk
-rich==13.7.1
+rich==13.9.4
     # via
     #   -r requirements/quality.txt
     #   cookiecutter
-rpds-py==0.19.1
+rpds-py==0.22.3
     # via
     #   -r requirements/quality.txt
     #   jsonschema
     #   referencing
-s3transfer==0.10.2
+s3transfer==0.10.4
     # via
     #   -r requirements/quality.txt
     #   boto3
-simplejson==3.19.2
+simplejson==3.19.3
     # via
     #   -r requirements/quality.txt
     #   xblock
     #   xblock-sdk
-six==1.16.0
+six==1.17.0
     # via
     #   -r requirements/quality.txt
     #   edx-lint
@@ -473,11 +462,11 @@ sniffio==1.3.1
     #   anyio
     #   httpx
     #   openai
-sqlparse==0.5.1
+sqlparse==0.5.3
     # via
     #   -r requirements/quality.txt
     #   django
-stevedore==5.2.0
+stevedore==5.4.0
     # via
     #   -r requirements/quality.txt
     #   code-annotations
@@ -485,60 +474,43 @@ text-unidecode==1.3
     # via
     #   -r requirements/quality.txt
     #   python-slugify
-tiktoken==0.7.0
+tiktoken==0.8.0
     # via
     #   -r requirements/quality.txt
     #   litellm
-tokenizers==0.19.1
+tokenizers==0.21.0
     # via
     #   -r requirements/quality.txt
     #   litellm
-tomli==2.0.1
-    # via
-    #   -r requirements/ci.txt
-    #   -r requirements/pip-tools.txt
-    #   -r requirements/quality.txt
-    #   build
-    #   coverage
-    #   pip-tools
-    #   pylint
-    #   pyproject-api
-    #   pytest
-    #   tox
-tomlkit==0.13.0
+tomlkit==0.13.2
     # via
     #   -r requirements/quality.txt
     #   pylint
-tox==4.16.0
+tox==4.23.2
     # via -r requirements/ci.txt
-tqdm==4.66.4
+tqdm==4.67.1
     # via
     #   -r requirements/quality.txt
     #   huggingface-hub
     #   openai
-types-python-dateutil==2.9.0.20240316
+types-python-dateutil==2.9.0.20241206
     # via
     #   -r requirements/quality.txt
     #   arrow
 typing-extensions==4.12.2
     # via
     #   -r requirements/quality.txt
-    #   annotated-types
     #   anyio
-    #   asgiref
-    #   astroid
     #   huggingface-hub
     #   openai
     #   pydantic
     #   pydantic-core
-    #   pylint
-    #   rich
-urllib3==1.26.19
+urllib3==2.2.3
     # via
     #   -r requirements/quality.txt
     #   botocore
     #   requests
-virtualenv==20.26.3
+virtualenv==20.28.0
     # via
     #   -r requirements/ci.txt
     #   tox
@@ -547,12 +519,12 @@ web-fragments==2.2.0
     #   -r requirements/quality.txt
     #   xblock
     #   xblock-sdk
-webob==1.8.7
+webob==1.8.9
     # via
     #   -r requirements/quality.txt
     #   xblock
     #   xblock-sdk
-wheel==0.43.0
+wheel==0.45.1
     # via
     #   -r requirements/pip-tools.txt
     #   pip-tools
@@ -561,18 +533,16 @@ xblock[django]==4.0.1
     #   -c requirements/constraints.txt
     #   -r requirements/quality.txt
     #   xblock-sdk
-xblock-sdk==0.11.0
+xblock-sdk==0.12.0
     # via -r requirements/quality.txt
-yarl==1.9.4
+yarl==1.18.3
     # via
     #   -r requirements/quality.txt
     #   aiohttp
-zipp==3.19.2
+zipp==3.21.0
     # via
-    #   -r requirements/pip-tools.txt
     #   -r requirements/quality.txt
     #   importlib-metadata
-    #   importlib-resources
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip
diff --git a/requirements/pip-tools.txt b/requirements/pip-tools.txt
index 3058830..154dd41 100644
--- a/requirements/pip-tools.txt
+++ b/requirements/pip-tools.txt
@@ -1,33 +1,23 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    make upgrade
 #
-build==1.2.1
+build==1.2.2.post1
     # via pip-tools
 click==8.1.7
     # via pip-tools
-importlib-metadata==6.11.0
-    # via
-    #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
-    #   build
-packaging==24.1
+packaging==24.2
     # via build
 pip-tools==7.4.1
     # via -r requirements/pip-tools.in
-pyproject-hooks==1.1.0
-    # via
-    #   build
-    #   pip-tools
-tomli==2.0.1
+pyproject-hooks==1.2.0
     # via
     #   build
     #   pip-tools
-wheel==0.43.0
+wheel==0.45.1
     # via pip-tools
-zipp==3.19.2
-    # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip
diff --git a/requirements/pip.txt b/requirements/pip.txt
index 7b187b8..9e92107 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -1,14 +1,16 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    make upgrade
 #
-wheel==0.43.0
+wheel==0.45.1
     # via -r requirements/pip.in
 
 # The following packages are considered to be unsafe in a requirements file:
 pip==24.2
-    # via -r requirements/pip.in
-setuptools==72.1.0
+    # via
+    #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
+    #   -r requirements/pip.in
+setuptools==75.6.0
     # via -r requirements/pip.in
diff --git a/requirements/quality.txt b/requirements/quality.txt
index a8f70cb..c79c82a 100644
--- a/requirements/quality.txt
+++ b/requirements/quality.txt
@@ -1,18 +1,18 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    make upgrade
 #
-aiohappyeyeballs==2.3.4
+aiohappyeyeballs==2.4.4
     # via
     #   -r requirements/test.txt
     #   aiohttp
-aiohttp==3.10.0
+aiohttp==3.11.10
     # via
     #   -r requirements/test.txt
     #   litellm
-aiosignal==1.3.1
+aiosignal==1.3.2
     # via
     #   -r requirements/test.txt
     #   aiohttp
@@ -20,7 +20,7 @@ annotated-types==0.7.0
     # via
     #   -r requirements/test.txt
     #   pydantic
-anyio==4.4.0
+anyio==4.7.0
     # via
     #   -r requirements/test.txt
     #   httpx
@@ -37,39 +37,30 @@ asgiref==3.8.1
     # via
     #   -r requirements/test.txt
     #   django
-astroid==3.2.4
+astroid==3.3.6
     # via
     #   pylint
     #   pylint-celery
-async-timeout==4.0.3
-    # via
-    #   -r requirements/test.txt
-    #   aiohttp
-attrs==23.2.0
+attrs==24.3.0
     # via
     #   -r requirements/test.txt
     #   aiohttp
     #   jsonschema
     #   referencing
-backports-zoneinfo==0.2.1 ; python_version < "3.9"
-    # via
-    #   -c requirements/constraints.txt
-    #   -r requirements/test.txt
-    #   django
 binaryornot==0.4.4
     # via
     #   -r requirements/test.txt
     #   cookiecutter
-boto3==1.34.151
+boto3==1.35.83
     # via
     #   -r requirements/test.txt
     #   fs-s3fs
-botocore==1.34.151
+botocore==1.35.83
     # via
     #   -r requirements/test.txt
     #   boto3
     #   s3transfer
-certifi==2024.7.4
+certifi==2024.12.14
     # via
     #   -r requirements/test.txt
     #   httpcore
@@ -79,7 +70,7 @@ chardet==5.2.0
     # via
     #   -r requirements/test.txt
     #   binaryornot
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   -r requirements/test.txt
     #   requests
@@ -93,25 +84,25 @@ click==8.1.7
     #   litellm
 click-log==0.4.0
     # via edx-lint
-code-annotations==1.8.0
+code-annotations==2.1.0
     # via edx-lint
 cookiecutter==2.6.0
     # via
     #   -r requirements/test.txt
     #   xblock-sdk
-coverage[toml]==7.6.0
+coverage[toml]==7.6.9
     # via
     #   -r requirements/test.txt
     #   pytest-cov
 ddt==1.7.2
     # via -r requirements/test.txt
-dill==0.3.8
+dill==0.3.9
     # via pylint
 distro==1.9.0
     # via
     #   -r requirements/test.txt
     #   openai
-django==4.2.14
+django==4.2.17
     # via
     #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
     #   -r requirements/test.txt
@@ -124,22 +115,17 @@ django-appconf==1.0.6
     # via
     #   -r requirements/test.txt
     #   django-statici18n
-django-statici18n==2.5.0
+django-statici18n==2.6.0
     # via -r requirements/test.txt
-edx-i18n-tools==1.6.2
+edx-i18n-tools==1.6.3
     # via -r requirements/test.txt
-edx-lint==5.3.7
+edx-lint==5.4.1
     # via -r requirements/quality.in
-exceptiongroup==1.2.2
-    # via
-    #   -r requirements/test.txt
-    #   anyio
-    #   pytest
-filelock==3.15.4
+filelock==3.16.1
     # via
     #   -r requirements/test.txt
     #   huggingface-hub
-frozenlist==1.4.1
+frozenlist==1.5.0
     # via
     #   -r requirements/test.txt
     #   aiohttp
@@ -155,7 +141,7 @@ fs-s3fs==1.1.1
     #   -r requirements/test.txt
     #   openedx-django-pyfs
     #   xblock-sdk
-fsspec==2024.6.1
+fsspec==2024.10.0
     # via
     #   -r requirements/test.txt
     #   huggingface-hub
@@ -163,35 +149,30 @@ h11==0.14.0
     # via
     #   -r requirements/test.txt
     #   httpcore
-httpcore==1.0.5
+httpcore==1.0.7
     # via
     #   -r requirements/test.txt
     #   httpx
-httpx==0.27.0
+httpx==0.27.2
     # via
     #   -r requirements/test.txt
+    #   litellm
     #   openai
-huggingface-hub==0.24.5
+huggingface-hub==0.27.0
     # via
     #   -r requirements/test.txt
     #   tokenizers
-idna==3.7
+idna==3.10
     # via
     #   -r requirements/test.txt
     #   anyio
     #   httpx
     #   requests
     #   yarl
-importlib-metadata==6.11.0
+importlib-metadata==8.5.0
     # via
-    #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
     #   -r requirements/test.txt
     #   litellm
-importlib-resources==6.4.0
-    # via
-    #   -r requirements/test.txt
-    #   jsonschema
-    #   jsonschema-specifications
 iniconfig==2.0.0
     # via
     #   -r requirements/test.txt
@@ -204,6 +185,10 @@ jinja2==3.1.4
     #   code-annotations
     #   cookiecutter
     #   litellm
+jiter==0.8.2
+    # via
+    #   -r requirements/test.txt
+    #   openai
 jmespath==1.0.1
     # via
     #   -r requirements/test.txt
@@ -213,7 +198,7 @@ jsonschema==4.23.0
     # via
     #   -r requirements/test.txt
     #   litellm
-jsonschema-specifications==2023.12.1
+jsonschema-specifications==2024.10.1
     # via
     #   -r requirements/test.txt
     #   jsonschema
@@ -221,15 +206,20 @@ lazy==1.6
     # via
     #   -r requirements/test.txt
     #   xblock
-litellm==1.42.6
+litellm==1.55.4
     # via -r requirements/test.txt
-lxml==5.2.2
+lxml[html-clean]==5.3.0
     # via
     #   -r requirements/test.txt
     #   edx-i18n-tools
+    #   lxml-html-clean
     #   xblock
     #   xblock-sdk
-mako==1.3.5
+lxml-html-clean==0.4.1
+    # via
+    #   -r requirements/test.txt
+    #   lxml
+mako==1.3.8
     # via
     #   -r requirements/test.txt
     #   xblock
@@ -237,7 +227,7 @@ markdown-it-py==3.0.0
     # via
     #   -r requirements/test.txt
     #   rich
-markupsafe==2.1.5
+markupsafe==3.0.2
     # via
     #   -r requirements/test.txt
     #   jinja2
@@ -251,20 +241,20 @@ mdurl==0.1.2
     #   markdown-it-py
 mock==5.1.0
     # via -r requirements/test.txt
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   -r requirements/test.txt
     #   aiohttp
     #   yarl
-openai==1.37.1
+openai==1.58.1
     # via
     #   -r requirements/test.txt
     #   litellm
-openedx-django-pyfs==3.6.0
+openedx-django-pyfs==3.7.0
     # via
     #   -r requirements/test.txt
     #   xblock
-packaging==24.1
+packaging==24.2
     # via
     #   -r requirements/test.txt
     #   huggingface-hub
@@ -273,13 +263,9 @@ path==16.16.0
     # via
     #   -r requirements/test.txt
     #   edx-i18n-tools
-pbr==6.0.0
+pbr==6.1.0
     # via stevedore
-pkgutil-resolve-name==1.3.10
-    # via
-    #   -r requirements/test.txt
-    #   jsonschema
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via pylint
 pluggy==1.5.0
     # via
@@ -289,14 +275,19 @@ polib==1.2.0
     # via
     #   -r requirements/test.txt
     #   edx-i18n-tools
-pycodestyle==2.12.0
+propcache==0.2.1
+    # via
+    #   -r requirements/test.txt
+    #   aiohttp
+    #   yarl
+pycodestyle==2.12.1
     # via -r requirements/quality.in
-pydantic==2.8.2
+pydantic==2.10.4
     # via
     #   -r requirements/test.txt
     #   litellm
     #   openai
-pydantic-core==2.20.1
+pydantic-core==2.27.2
     # via
     #   -r requirements/test.txt
     #   pydantic
@@ -304,7 +295,7 @@ pygments==2.18.0
     # via
     #   -r requirements/test.txt
     #   rich
-pylint==3.2.6
+pylint==3.3.2
     # via
     #   edx-lint
     #   pylint-celery
@@ -312,7 +303,7 @@ pylint==3.2.6
     #   pylint-plugin-utils
 pylint-celery==0.3
     # via edx-lint
-pylint-django==2.5.5
+pylint-django==2.6.1
     # via edx-lint
 pylint-plugin-utils==0.8.2
     # via
@@ -322,14 +313,14 @@ pypng==0.20220715.0
     # via
     #   -r requirements/test.txt
     #   xblock-sdk
-pytest==8.3.2
+pytest==8.3.4
     # via
     #   -r requirements/test.txt
     #   pytest-cov
     #   pytest-django
-pytest-cov==5.0.0
+pytest-cov==6.0.0
     # via -r requirements/test.txt
-pytest-django==4.8.0
+pytest-django==4.9.0
     # via -r requirements/test.txt
 python-dateutil==2.9.0.post0
     # via
@@ -346,11 +337,11 @@ python-slugify==8.0.4
     #   -r requirements/test.txt
     #   code-annotations
     #   cookiecutter
-pytz==2024.1
+pytz==2024.2
     # via
     #   -r requirements/test.txt
     #   xblock
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   -r requirements/test.txt
     #   code-annotations
@@ -363,7 +354,7 @@ referencing==0.35.1
     #   -r requirements/test.txt
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.7.24
+regex==2024.11.6
     # via
     #   -r requirements/test.txt
     #   tiktoken
@@ -375,25 +366,25 @@ requests==2.32.3
     #   litellm
     #   tiktoken
     #   xblock-sdk
-rich==13.7.1
+rich==13.9.4
     # via
     #   -r requirements/test.txt
     #   cookiecutter
-rpds-py==0.19.1
+rpds-py==0.22.3
     # via
     #   -r requirements/test.txt
     #   jsonschema
     #   referencing
-s3transfer==0.10.2
+s3transfer==0.10.4
     # via
     #   -r requirements/test.txt
     #   boto3
-simplejson==3.19.2
+simplejson==3.19.3
     # via
     #   -r requirements/test.txt
     #   xblock
     #   xblock-sdk
-six==1.16.0
+six==1.17.0
     # via
     #   -r requirements/test.txt
     #   edx-lint
@@ -406,55 +397,44 @@ sniffio==1.3.1
     #   anyio
     #   httpx
     #   openai
-sqlparse==0.5.1
+sqlparse==0.5.3
     # via
     #   -r requirements/test.txt
     #   django
-stevedore==5.2.0
+stevedore==5.4.0
     # via code-annotations
 text-unidecode==1.3
     # via
     #   -r requirements/test.txt
     #   python-slugify
-tiktoken==0.7.0
+tiktoken==0.8.0
     # via
     #   -r requirements/test.txt
     #   litellm
-tokenizers==0.19.1
+tokenizers==0.21.0
     # via
     #   -r requirements/test.txt
     #   litellm
-tomli==2.0.1
-    # via
-    #   -r requirements/test.txt
-    #   coverage
-    #   pylint
-    #   pytest
-tomlkit==0.13.0
+tomlkit==0.13.2
     # via pylint
-tqdm==4.66.4
+tqdm==4.67.1
     # via
     #   -r requirements/test.txt
     #   huggingface-hub
     #   openai
-types-python-dateutil==2.9.0.20240316
+types-python-dateutil==2.9.0.20241206
     # via
     #   -r requirements/test.txt
     #   arrow
 typing-extensions==4.12.2
     # via
     #   -r requirements/test.txt
-    #   annotated-types
     #   anyio
-    #   asgiref
-    #   astroid
     #   huggingface-hub
     #   openai
     #   pydantic
     #   pydantic-core
-    #   pylint
-    #   rich
-urllib3==1.26.19
+urllib3==2.2.3
     # via
     #   -r requirements/test.txt
     #   botocore
@@ -464,7 +444,7 @@ web-fragments==2.2.0
     #   -r requirements/test.txt
     #   xblock
     #   xblock-sdk
-webob==1.8.7
+webob==1.8.9
     # via
     #   -r requirements/test.txt
     #   xblock
@@ -474,17 +454,16 @@ xblock[django]==4.0.1
     #   -c requirements/constraints.txt
     #   -r requirements/test.txt
     #   xblock-sdk
-xblock-sdk==0.11.0
+xblock-sdk==0.12.0
     # via -r requirements/test.txt
-yarl==1.9.4
+yarl==1.18.3
     # via
     #   -r requirements/test.txt
     #   aiohttp
-zipp==3.19.2
+zipp==3.21.0
     # via
     #   -r requirements/test.txt
     #   importlib-metadata
-    #   importlib-resources
 
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools
diff --git a/requirements/test.txt b/requirements/test.txt
index 39a089c..e19a618 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -1,18 +1,18 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    make upgrade
 #
-aiohappyeyeballs==2.3.4
+aiohappyeyeballs==2.4.4
     # via
     #   -r requirements/base.txt
     #   aiohttp
-aiohttp==3.10.0
+aiohttp==3.11.10
     # via
     #   -r requirements/base.txt
     #   litellm
-aiosignal==1.3.1
+aiosignal==1.3.2
     # via
     #   -r requirements/base.txt
     #   aiohttp
@@ -20,7 +20,7 @@ annotated-types==0.7.0
     # via
     #   -r requirements/base.txt
     #   pydantic
-anyio==4.4.0
+anyio==4.7.0
     # via
     #   -r requirements/base.txt
     #   httpx
@@ -35,33 +35,24 @@ asgiref==3.8.1
     # via
     #   -r requirements/base.txt
     #   django
-async-timeout==4.0.3
-    # via
-    #   -r requirements/base.txt
-    #   aiohttp
-attrs==23.2.0
+attrs==24.3.0
     # via
     #   -r requirements/base.txt
     #   aiohttp
     #   jsonschema
     #   referencing
-backports-zoneinfo==0.2.1 ; python_version < "3.9"
-    # via
-    #   -c requirements/constraints.txt
-    #   -r requirements/base.txt
-    #   django
 binaryornot==0.4.4
     # via cookiecutter
-boto3==1.34.151
+boto3==1.35.83
     # via
     #   -r requirements/base.txt
     #   fs-s3fs
-botocore==1.34.151
+botocore==1.35.83
     # via
     #   -r requirements/base.txt
     #   boto3
     #   s3transfer
-certifi==2024.7.4
+certifi==2024.12.14
     # via
     #   -r requirements/base.txt
     #   httpcore
@@ -69,7 +60,7 @@ certifi==2024.7.4
     #   requests
 chardet==5.2.0
     # via binaryornot
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via
     #   -r requirements/base.txt
     #   requests
@@ -80,7 +71,7 @@ click==8.1.7
     #   litellm
 cookiecutter==2.6.0
     # via xblock-sdk
-coverage[toml]==7.6.0
+coverage[toml]==7.6.9
     # via pytest-cov
 ddt==1.7.2
     # via -r requirements/test.in
@@ -100,20 +91,15 @@ django-appconf==1.0.6
     # via
     #   -r requirements/base.txt
     #   django-statici18n
-django-statici18n==2.5.0
+django-statici18n==2.6.0
     # via -r requirements/base.txt
-edx-i18n-tools==1.6.2
+edx-i18n-tools==1.6.3
     # via -r requirements/test.in
-exceptiongroup==1.2.2
-    # via
-    #   -r requirements/base.txt
-    #   anyio
-    #   pytest
-filelock==3.15.4
+filelock==3.16.1
     # via
     #   -r requirements/base.txt
     #   huggingface-hub
-frozenlist==1.4.1
+frozenlist==1.5.0
     # via
     #   -r requirements/base.txt
     #   aiohttp
@@ -129,7 +115,7 @@ fs-s3fs==1.1.1
     #   -r requirements/base.txt
     #   openedx-django-pyfs
     #   xblock-sdk
-fsspec==2024.6.1
+fsspec==2024.10.0
     # via
     #   -r requirements/base.txt
     #   huggingface-hub
@@ -137,35 +123,30 @@ h11==0.14.0
     # via
     #   -r requirements/base.txt
     #   httpcore
-httpcore==1.0.5
+httpcore==1.0.7
     # via
     #   -r requirements/base.txt
     #   httpx
-httpx==0.27.0
+httpx==0.27.2
     # via
     #   -r requirements/base.txt
+    #   litellm
     #   openai
-huggingface-hub==0.24.5
+huggingface-hub==0.27.0
     # via
     #   -r requirements/base.txt
     #   tokenizers
-idna==3.7
+idna==3.10
     # via
     #   -r requirements/base.txt
     #   anyio
     #   httpx
     #   requests
     #   yarl
-importlib-metadata==6.11.0
+importlib-metadata==8.5.0
     # via
-    #   -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
     #   -r requirements/base.txt
     #   litellm
-importlib-resources==6.4.0
-    # via
-    #   -r requirements/base.txt
-    #   jsonschema
-    #   jsonschema-specifications
 iniconfig==2.0.0
     # via pytest
 jinja2==3.1.4
@@ -173,6 +154,10 @@ jinja2==3.1.4
     #   -r requirements/base.txt
     #   cookiecutter
     #   litellm
+jiter==0.8.2
+    # via
+    #   -r requirements/base.txt
+    #   openai
 jmespath==1.0.1
     # via
     #   -r requirements/base.txt
@@ -182,7 +167,7 @@ jsonschema==4.23.0
     # via
     #   -r requirements/base.txt
     #   litellm
-jsonschema-specifications==2023.12.1
+jsonschema-specifications==2024.10.1
     # via
     #   -r requirements/base.txt
     #   jsonschema
@@ -190,21 +175,24 @@ lazy==1.6
     # via
     #   -r requirements/base.txt
     #   xblock
-litellm==1.42.6
+litellm==1.55.4
     # via -r requirements/base.txt
-lxml==5.2.2
+lxml[html-clean]==5.3.0
     # via
     #   -r requirements/base.txt
     #   edx-i18n-tools
+    #   lxml-html-clean
     #   xblock
     #   xblock-sdk
-mako==1.3.5
+lxml-html-clean==0.4.1
+    # via lxml
+mako==1.3.8
     # via
     #   -r requirements/base.txt
     #   xblock
 markdown-it-py==3.0.0
     # via rich
-markupsafe==2.1.5
+markupsafe==3.0.2
     # via
     #   -r requirements/base.txt
     #   jinja2
@@ -214,41 +202,42 @@ mdurl==0.1.2
     # via markdown-it-py
 mock==5.1.0
     # via -r requirements/test.in
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   -r requirements/base.txt
     #   aiohttp
     #   yarl
-openai==1.37.1
+openai==1.58.1
     # via
     #   -r requirements/base.txt
     #   litellm
-openedx-django-pyfs==3.6.0
+openedx-django-pyfs==3.7.0
     # via
     #   -r requirements/base.txt
     #   -r requirements/test.in
     #   xblock
-packaging==24.1
+packaging==24.2
     # via
     #   -r requirements/base.txt
     #   huggingface-hub
     #   pytest
 path==16.16.0
     # via edx-i18n-tools
-pkgutil-resolve-name==1.3.10
-    # via
-    #   -r requirements/base.txt
-    #   jsonschema
 pluggy==1.5.0
     # via pytest
 polib==1.2.0
     # via edx-i18n-tools
-pydantic==2.8.2
+propcache==0.2.1
+    # via
+    #   -r requirements/base.txt
+    #   aiohttp
+    #   yarl
+pydantic==2.10.4
     # via
     #   -r requirements/base.txt
     #   litellm
     #   openai
-pydantic-core==2.20.1
+pydantic-core==2.27.2
     # via
     #   -r requirements/base.txt
     #   pydantic
@@ -256,13 +245,13 @@ pygments==2.18.0
     # via rich
 pypng==0.20220715.0
     # via xblock-sdk
-pytest==8.3.2
+pytest==8.3.4
     # via
     #   pytest-cov
     #   pytest-django
-pytest-cov==5.0.0
+pytest-cov==6.0.0
     # via -r requirements/test.in
-pytest-django==4.8.0
+pytest-django==4.9.0
     # via -r requirements/test.in
 python-dateutil==2.9.0.post0
     # via
@@ -276,11 +265,11 @@ python-dotenv==1.0.1
     #   litellm
 python-slugify==8.0.4
     # via cookiecutter
-pytz==2024.1
+pytz==2024.2
     # via
     #   -r requirements/base.txt
     #   xblock
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   -r requirements/base.txt
     #   cookiecutter
@@ -292,7 +281,7 @@ referencing==0.35.1
     #   -r requirements/base.txt
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.7.24
+regex==2024.11.6
     # via
     #   -r requirements/base.txt
     #   tiktoken
@@ -304,23 +293,23 @@ requests==2.32.3
     #   litellm
     #   tiktoken
     #   xblock-sdk
-rich==13.7.1
+rich==13.9.4
     # via cookiecutter
-rpds-py==0.19.1
+rpds-py==0.22.3
     # via
     #   -r requirements/base.txt
     #   jsonschema
     #   referencing
-s3transfer==0.10.2
+s3transfer==0.10.4
     # via
     #   -r requirements/base.txt
     #   boto3
-simplejson==3.19.2
+simplejson==3.19.3
     # via
     #   -r requirements/base.txt
     #   xblock
     #   xblock-sdk
-six==1.16.0
+six==1.17.0
     # via
     #   -r requirements/base.txt
     #   fs
@@ -332,43 +321,36 @@ sniffio==1.3.1
     #   anyio
     #   httpx
     #   openai
-sqlparse==0.5.1
+sqlparse==0.5.3
     # via
     #   -r requirements/base.txt
     #   django
 text-unidecode==1.3
     # via python-slugify
-tiktoken==0.7.0
+tiktoken==0.8.0
     # via
     #   -r requirements/base.txt
     #   litellm
-tokenizers==0.19.1
+tokenizers==0.21.0
     # via
     #   -r requirements/base.txt
     #   litellm
-tomli==2.0.1
-    # via
-    #   coverage
-    #   pytest
-tqdm==4.66.4
+tqdm==4.67.1
     # via
     #   -r requirements/base.txt
     #   huggingface-hub
     #   openai
-types-python-dateutil==2.9.0.20240316
+types-python-dateutil==2.9.0.20241206
     # via arrow
 typing-extensions==4.12.2
     # via
     #   -r requirements/base.txt
-    #   annotated-types
     #   anyio
-    #   asgiref
     #   huggingface-hub
     #   openai
     #   pydantic
     #   pydantic-core
-    #   rich
-urllib3==1.26.19
+urllib3==2.2.3
     # via
     #   -r requirements/base.txt
     #   botocore
@@ -378,7 +360,7 @@ web-fragments==2.2.0
     #   -r requirements/base.txt
     #   xblock
     #   xblock-sdk
-webob==1.8.7
+webob==1.8.9
     # via
     #   -r requirements/base.txt
     #   xblock
@@ -388,17 +370,16 @@ xblock[django]==4.0.1
     #   -c requirements/constraints.txt
     #   -r requirements/base.txt
     #   xblock-sdk
-xblock-sdk==0.11.0
+xblock-sdk==0.12.0
     # via -r requirements/test.in
-yarl==1.9.4
+yarl==1.18.3
     # via
     #   -r requirements/base.txt
     #   aiohttp
-zipp==3.19.2
+zipp==3.21.0
     # via
     #   -r requirements/base.txt
     #   importlib-metadata
-    #   importlib-resources
 
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools
diff --git a/setup.py b/setup.py
index d1a164c..9b44c86 100644
--- a/setup.py
+++ b/setup.py
@@ -37,6 +37,7 @@ def package_data(pkg, roots):
         "xblock.v1": [
             "shortanswer_ai_eval = ai_eval:ShortAnswerAIEvalXBlock",
             "coding_ai_eval = ai_eval:CodingAIEvalXBlock",
+            "multiagent_ai_eval = ai_eval:MultiAgentAIEvalXBlock",
         ]
     },
     package_data=package_data("ai_eval", ["static", "public", "templates"]),