is detected, it means the entity page is not found on wikipedia\n",
- " result_divs = soup.find_all(\"div\", {\"class\": \"mw-search-result-heading\"})\n",
- "\n",
- " if (\n",
- " result_divs\n",
- " ): # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities\n",
- " # get Similar results\n",
- " similar_titles = [div.a.get_text() for div in result_divs]\n",
- " return f\"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}\" # return the top 5 similar titles\n",
- " else:\n",
- " # the paper uses page to represent content in
\n",
- " # Extract xontent\n",
- " page_list = [\n",
- " p.get_text().strip() for p in soup.find_all(\"p\") + soup.find_all(\"ul\")\n",
- " ]\n",
- " # TODO: Recursive search, if find any concept that needs more search then call search again\n",
- " # if any(\"may refer to:\" in p for p in page_list):\n",
- " # search(entity)\n",
- "\n",
- " # restructure & clean the page content following the paper's logic\n",
- " page = \"\"\n",
- " for p in page_list:\n",
- " if len(p.split(\" \")) > 2:\n",
- " page += clean_str(p)\n",
- " if not p.endswith(\"\\n\"):\n",
- " page += \"\\n\"\n",
- " paragraphs = page.split(\"\\n\")\n",
- " paragraphs = [p.strip() for p in paragraphs if p.strip()]\n",
- "\n",
- " sentences = []\n",
- " for p in paragraphs:\n",
- " sentences += p.split(\". \")\n",
- " sentences = [s.strip() + \".\" for s in sentences if s.strip()]\n",
- "\n",
- " # return the first 5 sentences\n",
- " if sentences:\n",
- " return (\n",
- " \" \".join(sentences[:5]) if len(sentences) >= 5 else \" \".join(sentences)\n",
- " )\n",
- " else:\n",
- " return \"No content found on this page.\"\n",
- "\n",
- " # TODO: clean the paragraphs and return the searched content\n",
- "\n",
- "\n",
- "def lookup(text: str, keyword: str) -> str:\n",
- " \"\"\"\n",
- " returns the sentences containing keyword in the current passage.\n",
- " \"\"\"\n",
- " sentences = text.split(\".\")\n",
- " matching_sentences = [\n",
- " sentence.strip() + \".\"\n",
- " for sentence in sentences\n",
- " if keyword.lower() in sentence.lower()\n",
- " ]\n",
- " if not matching_sentences:\n",
- " return \"No sentences found with the keyword.\"\n",
- " else:\n",
- " return \" \".join(\n",
- " matching_sentences\n",
- " ) # Join all matching sentences into a single string"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [],
- "source": [
- "# set up tools for the agent\n",
- "tools = [FunctionTool.from_defaults(fn=search), FunctionTool.from_defaults(fn=lookup)]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Examples\n",
- "The next thing to add is examples. Few shot prompt engineering is a common practice to improve the model performance.\n",
- "\n",
- "Let's use the paper's examples. The paper has 6 examples altogether."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "examples = [\n",
- " \"\"\"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\n",
- "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\n",
- "Action 1: search(\"Colorado orogeny\")\n",
- "Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\n",
- "Thought 2: It does not mention the eastern sector. So I need to look up eastern sector.\n",
- "Action 2: lookup(\"eastern sector\")\n",
- "Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\n",
- "Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\n",
- "Action 3: search(\"High Plains\")\n",
- "Observation 3: High Plains refers to one of two distinct land regions:\n",
- "Thought 4: I need to instead search High Plains (United States).\n",
- "Action 4: search(\"High Plains (United States)\")\n",
- "Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\n",
- "Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\n",
- "Action 5: finish(\"1,800 to 7,000 ft\")\"\"\",\n",
- " \"\"\"Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\n",
- "Thought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\n",
- "Action 1: search(\"Milhouse\")\n",
- "Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\n",
- "Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".\n",
- "Action 2: lookup(\"named after\")\n",
- "Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \n",
- "Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\n",
- "Action 3: finish(\"Richard Nixon\")\"\"\",\n",
- " \"\"\"Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\n",
- "Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\n",
- "Action 1: search(\"Adam Clayton Powell\")\n",
- "Observation 1: Could not find [\"Adam Clayton Powell\"]. Similar: ['Adam Clayton Powell III', 'Seventh Avenue (Manhattan)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito'].\n",
- "Thought 2: To find the documentary, I can search Adam Clayton Powell (film).\n",
- "Action 2: search(\"Adam Clayton Powell (film)\")\n",
- "Observation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\n",
- "The film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\n",
- "Thought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\n",
- "Action 3: finish(\"The Saimaa Gesture\")\"\"\",\n",
- " \"\"\"Question: What profession does Nicholas Ray and Elia Kazan have in common?\n",
- "Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\n",
- "Action 1: search(\"Nicholas Ray\")\n",
- "Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 – June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\n",
- "Thought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\n",
- "Action 2: search(\"Elia Kazan\")\n",
- "Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\n",
- "Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\n",
- "Action 3: finish(\"director, screenwriter, actor\")\"\"\",\n",
- " \"\"\"Question: Which magazine was started first Arthur's Magazine or First for Women?\n",
- "Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\n",
- "Action 1: search(\"Arthur's Magazine\")\n",
- "Observation 1: Arthur's Magazine (1844-1846) was an American literary periodical published in Philadelphia in the 19th century. \n",
- "Thought 2: Arthur's Magazine was started in 1844. I need to search First for Women next.\n",
- "Action 2: search(\"First for Women\")\n",
- "Observation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \n",
- "Thought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\n",
- "Action 3: finish(\"Arthur's Magazine\")\"\"\",\n",
- " \"\"\"Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\n",
- "Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\n",
- "Action 1: search(\"Pavel Urysohn\")\n",
- "Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 â August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\n",
- "Thought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\n",
- "Action 2: search(\"Leonid Levin\")\n",
- "Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \n",
- "Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \n",
- "Action 3: finish(\"yes\")\"\"\",\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "# preset up the examples as prompt_kwargs, the examples will be included in the system prompt\n",
- "\n",
- "preset_prompt_kwargs = {\"examples\": examples}"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Model\n",
- "\n",
- "Next, we can choose the model to call. In this example we will use OpenAIClient ``gpt-3.5-turbo`` model. We will set the ``temperature`` at 0.0 to make the response as consistent as possible."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [],
- "source": [
- "gpt_model_kwargs = {\n",
- " \"model\": \"gpt-3.5-turbo\",\n",
- " \"temperature\": 0.0,\n",
- "}"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Agent\n",
- "Combining the previous components, we can define the agent."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "ReActAgent(\n",
- " tools=[FunctionTool(search), FunctionTool(lookup), FunctionTool(llm_tool), FunctionTool(finish)], max_steps=3, model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.0}, \n",
- " (prompt): Prompt(\n",
- " template: \n",
- " {# role/task description #}\n",
- " You task is to answer user's query with minimum steps and maximum accuracy using the tools provided.\n",
- " {# REACT instructions #}\n",
- " Each step you will read the previous Thought, Action, and Observation(execution result of the action)steps and then provide the next Thought and Action.\n",
- " \n",
- " You only have access to the following tools:\n",
- " {# tools #}\n",
- " {% for tool in tools %}\n",
- " {{ loop.index }}. ToolName: {{ tool.metadata.name }}\n",
- " Tool Description: {{ tool.metadata.description }}\n",
- " Tool Parameters: {{ tool.metadata.fn_schema_str }} {#tool args can be misleading, especially if we already have type hints and docstring in the function#}\n",
- " {% endfor %}\n",
- " {# output is always more robust to use json than string #}\n",
- " ---\n",
- " Your output must be in valid JSON format(raw Python string format) with two keys:\n",
- " {\n",
- " \"thought\": \"\",\n",
- " \"action\": \"ToolName(, )\"\n",
- " }\n",
- " - Must double quote the JSON str.\n",
- " - Inside of the JSON str, Must use escape double quote and escape backslash for string.\n",
- " For example:\n",
- " \"action\": \"finish(\\\"John's.\\\")\"\n",
- " ---\n",
- " {# Specifications TODO: preference between the usage of llm tool vs the other tool #}\n",
- " Process:\n",
- " - Step 1: Read the user query and potentially divide it into subqueries. And get started with the first subquery.\n",
- " - Call one available tool at a time to solve each subquery/subquestion. \\\n",
- " - At step 'finish', join all subqueries answers and finish the task.\n",
- " Remember:\n",
- " - Action must call one of the above tools with Took Name. It can not be empty.\n",
- " - Read the Tool Description and ensure your args and kwarg follow what each tool expects in types. e.g. (a=1, b=2) if it is keyword argument or (1, 2) if it is positional.\n",
- " - You will always end with 'finish' action to finish the task. The answer can be the final answer or failure message.\n",
- " - When the initial query is simple, use minimum steps to answer the query.\n",
- " {#Examples can be here#}\n",
- " {# Check if there are any examples #}\n",
- " {% if examples %}\n",
- " \n",
- " {% for example in examples %}\n",
- " {{ example }}\n",
- " {% endfor %}\n",
- " \n",
- " {% endif %}\n",
- " <>\n",
- " -----------------\n",
- " {# History #}\n",
- " {% for history in step_history %}\n",
- " Step {{history.step}}:\n",
- " {\n",
- " \"thought\": \"{{history.thought}}\",\n",
- " \"action\": \"{{history.action}}\",\n",
- " }\n",
- " \"observation\": \"{{history.observation}}\"\n",
- " {% endfor %}\n",
- " {% if input_str %}\n",
- " User query:\n",
- " {{ input_str }}\n",
- " {% endif %}\n",
- " , preset_prompt_kwargs: {'examples': ['Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\\nThought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\\nAction 1: search(\"Colorado orogeny\")\\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\\nThought 2: It does not mention the eastern sector. So I need to look up eastern sector.\\nAction 2: lookup(\"eastern sector\")\\nObservation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\\nThought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\\nAction 3: search(\"High Plains\")\\nObservation 3: High Plains refers to one of two distinct land regions:\\nThought 4: I need to instead search High Plains (United States).\\nAction 4: search(\"High Plains (United States)\")\\nObservation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\\nThought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\\nAction 5: finish(\"1,800 to 7,000 ft\")', 'Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\\nThought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\\nAction 1: search(\"Milhouse\")\\nObservation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\\nThought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".\\nAction 2: lookup(\"named after\")\\nObservation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \\nThought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\\nAction 3: finish(\"Richard Nixon\")', 'Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nThought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\\nAction 1: search(\"Adam Clayton Powell\")\\nObservation 1: Could not find [\"Adam Clayton Powell\"]. Similar: [\\'Adam Clayton Powell III\\', \\'Seventh Avenue (Manhattan)\\', \\'Adam Clayton Powell Jr. State Office Building\\', \\'Isabel Washington Powell\\', \\'Adam Powell\\', \\'Adam Clayton Powell (film)\\', \\'Giancarlo Esposito\\'].\\nThought 2: To find the documentary, I can search Adam Clayton Powell (film).\\nAction 2: search(\"Adam Clayton Powell (film)\")\\nObservation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\\nThe film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\\nThought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\\nAction 3: finish(\"The Saimaa Gesture\")', 'Question: What profession does Nicholas Ray and Elia Kazan have in common?\\nThought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\\nAction 1: search(\"Nicholas Ray\")\\nObservation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 – June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\\nThought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\\nAction 2: search(\"Elia Kazan\")\\nObservation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\\nThought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\\nAction 3: finish(\"director, screenwriter, actor\")', 'Question: Which magazine was started first Arthur\\'s Magazine or First for Women?\\nThought 1: I need to search Arthur\\'s Magazine and First for Women, and find which was started first.\\nAction 1: search(\"Arthur\\'s Magazine\")\\nObservation 1: Arthur\\'s Magazine (1844-\\x80\\x931846) was an American literary periodical published in Philadelphia in the 19th century. \\nThought 2: Arthur\\'s Magazine was started in 1844. I need to search First for Women next.\\nAction 2: search(\"First for Women\")\\nObservation 2: First for Women is a woman\\'s magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \\nThought 3: First for Women was started in 1989. 1844 (Arthur\\'s Magazine) < 1989 (First for Women), so Arthur\\'s Magazine was started first.\\nAction 3: finish(\"Arthur\\'s Magazine\")', 'Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nThought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\\nAction 1: search(\"Pavel Urysohn\")\\nObservation 1: Pavel Samuilovich Urysohn (February 3, 1898 â\\x80\\x93 August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\\nThought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\\nAction 2: search(\"Leonid Levin\")\\nObservation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \\nThought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \\nAction 3: finish(\"yes\")'], 'tools': [FunctionTool(search), FunctionTool(lookup), FunctionTool(llm_tool), FunctionTool(finish)]}, prompt_variables: ['examples', 'step_history', 'input_str', 'tools']\n",
- " )\n",
- " (model_client): OpenAIClient()\n",
- " (output_processors): JsonParser()\n",
- " (additional_llm_tool): Generator(\n",
- " model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.0}, \n",
- " (prompt): Prompt(\n",
- " template: \n",
- " {% if task_desc_str or output_format_str or tools_str or examples_str or chat_history_str or context_str or steps_str %}\n",
- " \n",
- " {% endif %}\n",
- " {# task desc #}\n",
- " {% if task_desc_str %}\n",
- " {{task_desc_str}}\n",
- " {% endif %}\n",
- " {# output format #}\n",
- " {% if output_format_str %}\n",
- " \n",
- " {{output_format_str}}\n",
- " \n",
- " {% endif %}\n",
- " {# tools #}\n",
- " {% if tools_str %}\n",
- " \n",
- " {{tools_str}}\n",
- " \n",
- " {% endif %}\n",
- " {# example #}\n",
- " {% if examples_str %}\n",
- " \n",
- " {{examples_str}}\n",
- " \n",
- " {% endif %}\n",
- " {# chat history #}\n",
- " {% if chat_history_str %}\n",
- " \n",
- " {{chat_history_str}}\n",
- " \n",
- " {% endif %}\n",
- " {#contex#}\n",
- " {% if context_str %}\n",
- " \n",
- " {{context_str}}\n",
- " \n",
- " {% endif %}\n",
- " {# steps #}\n",
- " {% if steps_str %}\n",
- " \n",
- " {{steps_str}}\n",
- " \n",
- " {% endif %}\n",
- " {% if task_desc_str or output_format_str or tools_str or examples_str or chat_history_str or context_str or steps_str %}\n",
- " \n",
- " {% endif %}\n",
- " {% if input_str %}\n",
- " \n",
- " {{input_str}}\n",
- " \n",
- " {% endif %}\n",
- " {% if output_str %}\n",
- " \n",
- " {{output_str}}\n",
- " \n",
- " {% endif %}\n",
- " You:\n",
- " , prompt_variables: ['context_str', 'input_str', 'steps_str', 'output_format_str', 'output_str', 'chat_history_str', 'tools_str', 'examples_str', 'task_desc_str']\n",
- " )\n",
- " (model_client): OpenAIClient()\n",
- " )\n",
- ")"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# max_steps refers to how many thought-action round we allow the model to perform\n",
- "# to save resources, let's use 3 here\n",
- "agent = ReActAgent(\n",
- " tools=tools,\n",
- " max_steps=3,\n",
- " model_client=OpenAIClient(),\n",
- " model_kwargs=gpt_model_kwargs,\n",
- " preset_prompt_kwargs=preset_prompt_kwargs,\n",
- ")\n",
- "agent"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import importlib\n",
- "import adalflow\n",
- "\n",
- "importlib.reload(adalflow)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 5. Q & A\n",
- "Next we can use the agent to answer our questions. Let's run 5 examples. We will use the validation data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Dataset({\n",
- " features: ['id', 'question', 'answer', 'type', 'level', 'supporting_facts', 'context'],\n",
- " num_rows: 7405\n",
- "})"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "val_dataset = dataset[\"validation\"]\n",
- "val_dataset"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "``LightRAG`` provides a ``printc`` function. You can utilize it to show colored console output for angent."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[36m2024-06-15 23:13:47 - [react_agent.py:330:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:13:48 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.\n",
- "Action 1: search(\"Scott Derrickson\")\n",
- "Observation 1: Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:13:50 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.\n",
- "Action 2: search(\"Ed Wood\")\n",
- "Observation 2: Edward Davis Wood Jr. (October 10, 1924 – December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:13:51 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: Scott Derrickson is an American filmmaker and Ed Wood is also an American filmmaker. They are of the same nationality.\n",
- "Action 3: finish(\"yes\")\n",
- "Observation 3: yes\u001b[0m\n",
- "\u001b[36m2024-06-15 23:13:51 - [react_agent.py:345:call] - answer: yes\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.', action='search(\"Scott Derrickson\")', fun_name='search', fun_args=['Scott Derrickson'], fun_kwargs={}, observation='Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.'), StepOutput(step=2, thought='Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.', action='search(\"Ed Wood\")', fun_name='search', fun_args=['Ed Wood'], fun_kwargs={}, observation=\"Edward Davis Wood Jr. (October 10, 1924\\xa0– December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\"), StepOutput(step=3, thought='Scott Derrickson is an American filmmaker and Ed Wood is also an American filmmaker. They are of the same nationality.', action='finish(\"yes\")', fun_name='finish', fun_args=['yes'], fun_kwargs={}, observation='yes')]\n",
- "\u001b[33m2024-06-15 23:13:51 - [2706144185.py:12:] - question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: yes\u001b[0m\n",
- "\u001b[36m2024-06-15 23:13:51 - [react_agent.py:330:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:13:53 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.\n",
- "Action 1: search(\"Corliss Archer Kiss and Tell film\")\n",
- "Observation 1: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:13:55 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n",
- "Action 2: search(\"Corliss Archer Kiss and Tell film\")\n",
- "Observation 2: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\",\n",
- " \"action\": \"search(\\\"Corliss Archer Kiss and Tell film\\\")\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:13:57 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n",
- "Action 3: search(\"Corliss Archer Kiss and Tell film\")\n",
- "Observation 3: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n",
- "\u001b[36m2024-06-15 23:13:57 - [react_agent.py:345:call] - answer: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=2, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=3, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\")]\n",
- "\u001b[33m2024-06-15 23:13:57 - [2706144185.py:12:] - question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: could not find exact page for corliss archer kiss and tell film similar topics kiss for corliss corliss archer kiss and tell 1945 film kiss and tell play meet corliss archer\u001b[0m\n",
- "\u001b[36m2024-06-15 23:13:57 - [react_agent.py:330:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:13:59 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for a science fantasy young adult series told in first person, with companion books narrating the stories of enslaved worlds and alien species.\n",
- "Action 1: search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")\n",
- "Observation 1: Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:14:00 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The search results suggest 'Animorphs' as a similar topic, I should search for 'Animorphs' to see if it matches the criteria.\n",
- "Action 2: search('Animorphs')\n",
- "Observation 2: Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\u001b[0m\n",
- "\u001b[34m2024-06-15 23:14:01 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The series that matches the criteria is 'Animorphs'. I should provide this as the answer.\n",
- "Action 3: finish(\"Animorphs\")\n",
- "Observation 3: Animorphs\u001b[0m\n",
- "\u001b[36m2024-06-15 23:14:01 - [react_agent.py:345:call] - answer: Animorphs\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for a science fantasy young adult series told in first person, with companion books narrating the stories of enslaved worlds and alien species.', action='search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")', fun_name='search', fun_args=['science fantasy young adult series first person companion books enslaved worlds alien species'], fun_kwargs={}, observation=\"Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\"), StepOutput(step=2, thought=\"The search results suggest 'Animorphs' as a similar topic, I should search for 'Animorphs' to see if it matches the criteria.\", action=\"search('Animorphs')\", fun_name='search', fun_args=['Animorphs'], fun_kwargs={}, observation=\"Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\"), StepOutput(step=3, thought=\"The series that matches the criteria is 'Animorphs'. I should provide this as the answer.\", action='finish(\"Animorphs\")', fun_name='finish', fun_args=['Animorphs'], fun_kwargs={}, observation='Animorphs')]\n",
- "\u001b[33m2024-06-15 23:14:01 - [2706144185.py:12:] - question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: animorphs\u001b[0m\n",
- "\u001b[36m2024-06-15 23:14:01 - [react_agent.py:330:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:14:03 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.\n",
- "Action 1: search(\"Laleli Mosque\")\n",
- "Observation 1: The Laleli Mosque (Turkish: Laleli Camii, lit. 'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\u001b[0m\n",
- "\u001b[34m2024-06-15 23:14:04 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.\n",
- "Action 2: search(\"Esma Sultan Mansion\")\n",
- "Observation 2: The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].\u001b[0m\n",
- "\u001b[34m2024-06-15 23:14:05 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.\n",
- "Action 3: finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")\n",
- "Observation 3: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:14:05 - [react_agent.py:345:call] - answer: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.', action='search(\"Laleli Mosque\")', fun_name='search', fun_args=['Laleli Mosque'], fun_kwargs={}, observation=\"The Laleli Mosque (Turkish: Laleli Camii, lit.\\u2009'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\"), StepOutput(step=2, thought='I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.', action='search(\"Esma Sultan Mansion\")', fun_name='search', fun_args=['Esma Sultan Mansion'], fun_kwargs={}, observation='The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].'), StepOutput(step=3, thought='I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.', action='finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")', fun_name='finish', fun_args=['No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.'], fun_kwargs={}, observation='No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.')]\n",
- "\u001b[33m2024-06-15 23:14:05 - [2706144185.py:12:] - question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: no laleli mosque is located in laleli fatih istanbul turkey while esma sultan mansion is in ortaköy neighborhood of istanbul turkey\u001b[0m\n",
- "\u001b[36m2024-06-15 23:14:06 - [react_agent.py:330:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:14:07 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\n",
- "Action 1: search('Big Stone Gap director')\n",
- "Observation 1: Could not find exact page for 'Big Stone Gap director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Union High School (Big Stone Gap, Virginia)', 'Star Wood', 'Dagmara Domińczyk']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:14:09 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\n",
- "Action 2: search('Big Stone Gap film location')\n",
- "Observation 2: Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\",\n",
- " \"action\": \"search('Big Stone Gap film location')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:14:11 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\n",
- "Action 3: search('Big Stone Gap film location')\n",
- "Observation 3: Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\u001b[0m\n",
- "\u001b[36m2024-06-15 23:14:11 - [react_agent.py:345:call] - answer: Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought=\"I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\", action=\"search('Big Stone Gap director')\", fun_name='search', fun_args=['Big Stone Gap director'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Union High School (Big Stone Gap, Virginia)', 'Star Wood', 'Dagmara Domińczyk']\"), StepOutput(step=2, thought=\"I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\", action=\"search('Big Stone Gap film location')\", fun_name='search', fun_args=['Big Stone Gap film location'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\"), StepOutput(step=3, thought=\"I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\", action=\"search('Big Stone Gap film location')\", fun_name='search', fun_args=['Big Stone Gap film location'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\")]\n",
- "\u001b[33m2024-06-15 23:14:11 - [2706144185.py:12:] - question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: could not find exact page for big stone gap film location similar topics big stone gap virginia adriana trigiani dagmara domińczyk big lebowski beach film\u001b[0m\n"
- ]
- }
- ],
- "source": [
- "from adalflow.utils.logger import printc\n",
- "\n",
- "num_questions = 5\n",
- "for i in range(num_questions):\n",
- " question = val_dataset[i][\"question\"]\n",
- " gt_answer = normalize_answer(\n",
- " val_dataset[i][\"answer\"]\n",
- " ) # normalize the ground truth answer\n",
- "\n",
- " # get the agent's response\n",
- " pred_answer = agent(question)\n",
- " pred_answer = normalize_answer(pred_answer)\n",
- "\n",
- " printc(\n",
- " f\"question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n",
- " color=\"yellow\",\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 6. Evaluation\n",
- "\n",
- "Now you will see that we have the ``exact correct answer`` for some questions:\n",
- "\n",
- "question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: ``yes`` pred answer: ``yes``\n",
- "\n",
- "question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: ``animorphs``, pred answer: ``animorphs``\n",
- "\n",
- "Sometimes the agent performs correctly but not in the same format with the ground truth. E.g. ground truth: ``no``, pred answer: ``no, they are not the same``. This is what we can tolerate.\n",
- "\n",
- "But how to evaluate if the agent is doing well, or if our tools, examples, and prompt implementations work well? We need to evaluate it.\n",
- "\n",
- "1. Exact Match(EM)\n",
- "Exact Match is what the paper is using. Only when the normalized agent response is the same with the ground truth answer, we count it as correct. The paper's EM for react agent is around 30%(gpt-3).\n",
- "\n",
- "2. Fuzzy Match(FM)\n",
- "EM doesn't make much sense in question and answering. So we propose fuzzy match based on the LLMs' lengthy output nature. If the ground truth answer is included in the agent response, then we count it as correct. FM is not necessarily correct. \n",
- "E.g. question: Harry Potter and Dumbledore, who is older? ground truth: ``dumbledore``, pred answer: ``harry potter is older than dumbledore.``\n",
- "the model mentioned the groud truth but still provide wrong answer. So FM serves as reference.\n",
- "\n",
- "Let's use ``LightRAG eval`` module and evaluate on 10 questions and keep the model's practice to set ``max_step`` at `7`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[36m2024-06-15 23:15:11 - [react_agent.py:330:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:13 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.\n",
- "Action 1: search(\"Scott Derrickson\")\n",
- "Observation 1: Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:15 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.\n",
- "Action 2: search(\"Ed Wood\")\n",
- "Observation 2: Edward Davis Wood Jr. (October 10, 1924 – December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:15 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: Scott Derrickson is an American filmmaker and Ed Wood is also American. So, they are of the same nationality.\n",
- "Action 3: finish(\"yes\")\n",
- "Observation 3: yes\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:15 - [react_agent.py:345:call] - answer: yes\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.', action='search(\"Scott Derrickson\")', fun_name='search', fun_args=['Scott Derrickson'], fun_kwargs={}, observation='Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.'), StepOutput(step=2, thought='Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.', action='search(\"Ed Wood\")', fun_name='search', fun_args=['Ed Wood'], fun_kwargs={}, observation=\"Edward Davis Wood Jr. (October 10, 1924\\xa0– December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\"), StepOutput(step=3, thought='Scott Derrickson is an American filmmaker and Ed Wood is also American. So, they are of the same nationality.', action='finish(\"yes\")', fun_name='finish', fun_args=['yes'], fun_kwargs={}, observation='yes')]\n",
- "\u001b[33m2024-06-15 23:15:15 - [3641068398.py:26:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: yes\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:15 - [react_agent.py:330:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:17 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.\n",
- "Action 1: search(\"Corliss Archer Kiss and Tell film\")\n",
- "Observation 1: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:19 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n",
- "Action 2: search(\"Corliss Archer Kiss and Tell film\")\n",
- "Observation 2: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['Corliss Archer', 'A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\",\n",
- " \"action\": \"search(\\\"Corliss Archer Kiss and Tell film\\\")\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:15:21 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n",
- "Action 3: search(\"Corliss Archer Kiss and Tell film\")\n",
- "Observation 3: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:23 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n",
- "Action 4: search(\"Corliss Archer film Kiss and Tell cast\")\n",
- "Observation 4: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:25 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n",
- "Action 5: search(\"Corliss Archer film Kiss and Tell cast\")\n",
- "Observation 5: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:27 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n",
- "Action 6: search(\"Corliss Archer film Kiss and Tell cast\")\n",
- "Observation 6: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:29 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n",
- "Action 7: search(\"Corliss Archer film Kiss and Tell cast\")\n",
- "Observation 7: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:29 - [react_agent.py:345:call] - answer: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=2, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['Corliss Archer', 'A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=3, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=4, thought='I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\"), StepOutput(step=5, thought='I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\"), StepOutput(step=6, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\"), StepOutput(step=7, thought='I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\")]\n",
- "\u001b[33m2024-06-15 23:15:29 - [3641068398.py:26:] - No. 2, question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: could not find exact page for corliss archer film kiss and tell cast similar topics kiss for corliss kiss and tell 1945 film kiss and tell play janet waldo kathryn card\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:29 - [react_agent.py:330:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:32 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for a science fantasy young adult series told in first person and involving companion books about enslaved worlds and alien species.\n",
- "Action 1: search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")\n",
- "Observation 1: Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:34 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The search did not return an exact match. I should search for 'Animorphs' as it involves young adult science fantasy, first-person narration, enslaved worlds, and alien species.\n",
- "Action 2: search('Animorphs')\n",
- "Observation 2: Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:35 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The series that matches the description is Animorphs. I should provide this as the answer.\n",
- "Action 3: finish(\"Animorphs\")\n",
- "Observation 3: Animorphs\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:35 - [react_agent.py:345:call] - answer: Animorphs\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for a science fantasy young adult series told in first person and involving companion books about enslaved worlds and alien species.', action='search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")', fun_name='search', fun_args=['science fantasy young adult series first person companion books enslaved worlds alien species'], fun_kwargs={}, observation=\"Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\"), StepOutput(step=2, thought=\"The search did not return an exact match. I should search for 'Animorphs' as it involves young adult science fantasy, first-person narration, enslaved worlds, and alien species.\", action=\"search('Animorphs')\", fun_name='search', fun_args=['Animorphs'], fun_kwargs={}, observation=\"Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\"), StepOutput(step=3, thought='The series that matches the description is Animorphs. I should provide this as the answer.', action='finish(\"Animorphs\")', fun_name='finish', fun_args=['Animorphs'], fun_kwargs={}, observation='Animorphs')]\n",
- "\u001b[33m2024-06-15 23:15:35 - [3641068398.py:26:] - No. 3, question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: animorphs\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:35 - [react_agent.py:330:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:36 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.\n",
- "Action 1: search(\"Laleli Mosque\")\n",
- "Observation 1: The Laleli Mosque (Turkish: Laleli Camii, lit. 'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:37 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.\n",
- "Action 2: search(\"Esma Sultan Mansion\")\n",
- "Observation 2: The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:39 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.\n",
- "Action 3: finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")\n",
- "Observation 3: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:39 - [react_agent.py:345:call] - answer: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.', action='search(\"Laleli Mosque\")', fun_name='search', fun_args=['Laleli Mosque'], fun_kwargs={}, observation=\"The Laleli Mosque (Turkish: Laleli Camii, lit.\\u2009'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\"), StepOutput(step=2, thought='I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.', action='search(\"Esma Sultan Mansion\")', fun_name='search', fun_args=['Esma Sultan Mansion'], fun_kwargs={}, observation='The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].'), StepOutput(step=3, thought='I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.', action='finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")', fun_name='finish', fun_args=['No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.'], fun_kwargs={}, observation='No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.')]\n",
- "\u001b[33m2024-06-15 23:15:39 - [3641068398.py:26:] - No. 4, question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: no laleli mosque is located in laleli fatih istanbul turkey while esma sultan mansion is in ortaköy neighborhood of istanbul turkey\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:39 - [react_agent.py:330:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:43 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\n",
- "Action 1: search('Big Stone Gap film director')\n",
- "Observation 1: Could not find exact page for 'Big Stone Gap film director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'C. C. Swiney']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:45 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n",
- "Action 2: search('Big Stone Gap film')\n",
- "Observation 2: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n",
- " \"action\": \"search('Big Stone Gap film')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:15:47 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n",
- "Action 3: search('Big Stone Gap film')\n",
- "Observation 3: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n",
- " \"action\": \"search('Big Stone Gap film')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:15:49 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n",
- "Action 4: search('Big Stone Gap film')\n",
- "Observation 4: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n",
- " \"action\": \"search('Big Stone Gap film')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:15:51 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n",
- "Action 5: search('Big Stone Gap film')\n",
- "Observation 5: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n",
- " \"action\": \"search('Big Stone Gap film')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:15:53 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n",
- "Action 6: search('Big Stone Gap film')\n",
- "Observation 6: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n",
- " \"action\": \"search('Big Stone Gap film')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:15:54 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n",
- "Action 7: search('Big Stone Gap film')\n",
- "Observation 7: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:54 - [react_agent.py:345:call] - answer: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought=\"I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\", action=\"search('Big Stone Gap film director')\", fun_name='search', fun_args=['Big Stone Gap film director'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'C. C. Swiney']\"), StepOutput(step=2, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=3, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=4, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=5, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=6, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=7, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\")]\n",
- "\u001b[33m2024-06-15 23:15:54 - [3641068398.py:26:] - No. 5, question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: could not find exact page for big stone gap film similar topics big stone gap film big stone gap virginia adriana trigiani dagmara domińczyk darién gap\u001b[0m\n",
- "\u001b[36m2024-06-15 23:15:54 - [react_agent.py:330:call] - input_query: 2014 S/S is the debut album of a South Korean boy group that was formed by who?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:57 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the debut album 2014 S/S of a South Korean boy group and find out who formed the group.\n",
- "Action 1: search(\"2014 S/S South Korean boy group debut album\")\n",
- "Observation 1: Could not find exact page for '2014 S/S South Korean boy group debut album'. Similar topics: ['2014 S/S', 'S.Coups', 'S (South Korean band)', 'List of South Korean boy bands', 'Kara (South Korean group)']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:15:58 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: To find out who formed the group, I need to search for the South Korean boy group's debut album 2014 S/S and look for information on the group's formation.\n",
- "Action 2: search('S (South Korean band)')\n",
- "Observation 2: S (에스) is a South Korean project group consisting of three members: Kangta, Lee Ji-hoon and Shin Hye-sung. The group debuted in 2003, under the SM Entertainment label. After 11 years, they released and promoted another mini-album in 2014.. In 2003, S released their first album Fr.In.Cl, which stands for Friends in Classic. In 2014, after more than a decade in hiatus, the group released their second mini-album Autumn Breeze on October 27.\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"To find out who formed the South Korean boy group that debuted with the album 2014 S/S, I need to search for the album's details and look for information on the group's formation.\",\n",
- " \"action\": \"search('2014 S/S (album)')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:16:00 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: To find out who formed the South Korean boy group that debuted with the album 2014 S/S, I need to search for the album's details and look for information on the group's formation.\n",
- "Action 3: search('2014 S/S (album)')\n",
- "Observation 3: 2014 S/S is the debut album of South Korean group Winner. It was released on August 12, 2014, by the group's record label, YG Entertainment. The members were credited for writing the lyrics and composing the majority of the album's songs.[1]. The members produced the majority of the music for this album themselves, with the help of other producers such as Choice 37, B.I., Airplay, and others.[2] The album was highlighted for incorporating elements generally absent from K-pop releases, including hints of acoustic and alternative rock.[3][4]. The title track \"Empty\" became a hit in South Korea, topping the Gaon Chart and Billboard's K-Pop Hot 100 chart.[5][6] Internationally, 2014 S/S took the top spot in Billboard's World Album Chart.[7].\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to search for Winner's formation details.\",\n",
- " \"action\": \"search('Winner (South Korean band)')\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:16:02 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to search for Winner's formation details.\n",
- "Action 4: search('Winner (South Korean band)')\n",
- "Observation 4: Winner (Korean: 위너; romanized: wineo; stylized in all caps) is a South Korean boy band consisting of four members: Jinu, Hoony, Mino and Seungyoon. The band was formed in 2013 by YG Entertainment through Mnet's survival program WIN: Who Is Next leading to their official debut on August 17, 2014, with studio album 2014 S/S. Originally a five-piece band, Taehyun departed in November 2016 for his mental health and well-being. Upon their debut, The Korea Herald cited their immediate success as \"unprecedented\" for a new group, thus dubbed as \"Monster rookies\". Winner is often cited as a \"self-producing\" band with all members actively taking on roles from producing and choreographing to designing and marketing.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:03 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to provide the name of the group that formed Winner.\n",
- "Action 5: finish(\"YG Entertainment\")\n",
- "Observation 5: YG Entertainment\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:03 - [react_agent.py:345:call] - answer: YG Entertainment\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for the debut album 2014 S/S of a South Korean boy group and find out who formed the group.', action='search(\"2014 S/S South Korean boy group debut album\")', fun_name='search', fun_args=['2014 S/S South Korean boy group debut album'], fun_kwargs={}, observation=\"Could not find exact page for '2014 S/S South Korean boy group debut album'. Similar topics: ['2014 S/S', 'S.Coups', 'S (South Korean band)', 'List of South Korean boy bands', 'Kara (South Korean group)']\"), StepOutput(step=2, thought=\"To find out who formed the group, I need to search for the South Korean boy group's debut album 2014 S/S and look for information on the group's formation.\", action=\"search('S (South Korean band)')\", fun_name='search', fun_args=['S (South Korean band)'], fun_kwargs={}, observation='S (에스) is a South Korean project group consisting of three members: Kangta, Lee Ji-hoon and Shin Hye-sung. The group debuted in 2003, under the SM Entertainment label. After 11 years, they released and promoted another mini-album in 2014.. In 2003, S released their first album Fr.In.Cl, which stands for Friends in Classic. In 2014, after more than a decade in hiatus, the group released their second mini-album Autumn Breeze on October 27.'), StepOutput(step=3, thought=\"To find out who formed the South Korean boy group that debuted with the album 2014 S/S, I need to search for the album's details and look for information on the group's formation.\", action=\"search('2014 S/S (album)')\", fun_name='search', fun_args=['2014 S/S (album)'], fun_kwargs={}, observation='2014 S/S is the debut album of South Korean group Winner. It was released on August 12, 2014, by the group\\'s record label, YG Entertainment. The members were credited for writing the lyrics and composing the majority of the album\\'s songs.[1]. The members produced the majority of the music for this album themselves, with the help of other producers such as Choice 37, B.I., Airplay, and others.[2] The album was highlighted for incorporating elements generally absent from K-pop releases, including hints of acoustic and alternative rock.[3][4]. The title track \"Empty\" became a hit in South Korea, topping the Gaon Chart and Billboard\\'s K-Pop Hot 100 chart.[5][6] Internationally, 2014 S/S took the top spot in Billboard\\'s World Album Chart.[7].'), StepOutput(step=4, thought=\"To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to search for Winner's formation details.\", action=\"search('Winner (South Korean band)')\", fun_name='search', fun_args=['Winner (South Korean band)'], fun_kwargs={}, observation='Winner (Korean:\\xa0위너; romanized: wineo; stylized in all caps) is a South Korean boy band consisting of four members: Jinu, Hoony, Mino and Seungyoon. The band was formed in 2013 by YG Entertainment through Mnet\\'s survival program WIN: Who Is Next leading to their official debut on August 17, 2014, with studio album 2014 S/S. Originally a five-piece band, Taehyun departed in November 2016 for his mental health and well-being. Upon their debut, The Korea Herald cited their immediate success as \"unprecedented\" for a new group, thus dubbed as \"Monster rookies\". Winner is often cited as a \"self-producing\" band with all members actively taking on roles from producing and choreographing to designing and marketing.'), StepOutput(step=5, thought='To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to provide the name of the group that formed Winner.', action='finish(\"YG Entertainment\")', fun_name='finish', fun_args=['YG Entertainment'], fun_kwargs={}, observation='YG Entertainment')]\n",
- "\u001b[33m2024-06-15 23:16:03 - [3641068398.py:26:] - No. 6, question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?, ground truth: yg entertainment, pred answer: yg entertainment\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:03 - [react_agent.py:330:call] - input_query: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:05 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to find the person known by the stage name Aladin and who worked as a consultant to help organizations improve their performance.\n",
- "Action 1: search(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 1: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:07 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I should try searching for Aladin's stage name and his work as a consultant to help organizations improve their performance.\n",
- "Action 2: search(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 2: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:08 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n",
- "Action 3: search(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 3: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:10 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n",
- "Action 4: search(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 4: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:12 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n",
- "Action 5: search(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 5: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:13 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n",
- "Action 6: search(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 6: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:15 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n",
- "Action 7: search(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 7: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:15 - [react_agent.py:345:call] - answer: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to find the person known by the stage name Aladin and who worked as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=2, thought=\"I should try searching for Aladin's stage name and his work as a consultant to help organizations improve their performance.\", action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=3, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=4, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=5, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=6, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=7, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\")]\n",
- "\u001b[33m2024-06-15 23:16:15 - [3641068398.py:26:] - No. 7, question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?, ground truth: eenasul fateh, pred answer: could not find exact page for aladin stage name consultant organizations performance similar topics 2009 in film bret gilliam\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:15 - [react_agent.py:330:call] - input_query: The arena where the Lewiston Maineiacs played their home games can seat how many people?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:17 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the arena where the Lewiston Maineiacs played their home games and find out its seating capacity.\n",
- "Action 1: search(\"Lewiston Maineiacs arena\")\n",
- "Observation 1: Could not find exact page for 'Lewiston Maineiacs arena'. Similar topics: ['Lewiston Maineiacs', 'The Colisée', 'Centre Agnico Eagle', 'Sherbrooke Castors', 'Trois-Rivières Draveurs']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:18 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the seating capacity of the arena where the Lewiston Maineiacs played their home games.\n",
- "Action 2: search(\"The Colisée seating capacity\")\n",
- "Observation 2: Could not find exact page for 'The Colisée seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'List of indoor arenas in the United States', 'Colosseum', 'List of indoor arenas in Canada']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:20 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the seating capacity of the Colisée de Québec where the Lewiston Maineiacs played their home games.\n",
- "Action 3: search(\"Colisée de Québec seating capacity\")\n",
- "Observation 3: Could not find exact page for 'Colisée de Québec seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'Videotron Centre', 'List of indoor arenas by capacity', 'List of indoor arenas in Canada']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:22 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n",
- "Action 4: search(\"Videotron Centre seating capacity\")\n",
- "Observation 4: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:24 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n",
- "Action 5: search(\"Videotron Centre seating capacity\")\n",
- "Observation 5: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:25 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n",
- "Action 6: search(\"Videotron Centre seating capacity\")\n",
- "Observation 6: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:27 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n",
- "Action 7: search(\"Videotron Centre seating capacity\")\n",
- "Observation 7: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:27 - [react_agent.py:345:call] - answer: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for the arena where the Lewiston Maineiacs played their home games and find out its seating capacity.', action='search(\"Lewiston Maineiacs arena\")', fun_name='search', fun_args=['Lewiston Maineiacs arena'], fun_kwargs={}, observation=\"Could not find exact page for 'Lewiston Maineiacs arena'. Similar topics: ['Lewiston Maineiacs', 'The Colisée', 'Centre Agnico Eagle', 'Sherbrooke Castors', 'Trois-Rivières Draveurs']\"), StepOutput(step=2, thought='I need to search for the seating capacity of the arena where the Lewiston Maineiacs played their home games.', action='search(\"The Colisée seating capacity\")', fun_name='search', fun_args=['The Colisée seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'The Colisée seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'List of indoor arenas in the United States', 'Colosseum', 'List of indoor arenas in Canada']\"), StepOutput(step=3, thought='I need to search for the seating capacity of the Colisée de Québec where the Lewiston Maineiacs played their home games.', action='search(\"Colisée de Québec seating capacity\")', fun_name='search', fun_args=['Colisée de Québec seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Colisée de Québec seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'Videotron Centre', 'List of indoor arenas by capacity', 'List of indoor arenas in Canada']\"), StepOutput(step=4, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\"), StepOutput(step=5, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\"), StepOutput(step=6, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\"), StepOutput(step=7, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\")]\n",
- "\u001b[33m2024-06-15 23:16:27 - [3641068398.py:26:] - No. 8, question: The arena where the Lewiston Maineiacs played their home games can seat how many people?, ground truth: 3677 seated, pred answer: could not find exact page for videotron centre seating capacity similar topics videotron centre list of ice hockey arenas by capacity colisée de québec list of indoor arenas by capacity potential national hockey league expansion\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:27 - [react_agent.py:330:call] - input_query: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:28 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the ages of Annie Morton and Terry Richardson to determine who is older.\n",
- "Action 1: search(\"Annie Morton\")\n",
- "Observation 1: Annie Morton (born October 8, 1970) is an American model[1] born in Pennsylvania.[2] She has appeared on the covers of British Vogue, ID, Marie Claire, and other magazines. She has been photographed by Helmut Newton; Peter Lindbergh; Annie Leibovitz; Richard Avedon; Juergen Teller;[3] Paul Jasmin, Mary Ellen Mark, Stephen Shames, and Terry Richardson, and modeled for Donna Karan,[4] Givenchy, Guerlain, Chanel, Harper's Bazaar, Sports Illustrated and Victoria's Secret.[5] A long time vegetarian, an advocate for organic lifestyle choices and natural healthcare. She co-founded Tsi-La Organics, a \"Green Luxury\" company that creates and sells vegan, organic perfume and skin care products.[6]. She has appeared on many magazine covers and has been featured in several professional photobooks, Peter Lindbergh Selected Work, the cover of Juergen Teller By Juergen Teller, Helmut Newton Pages From The Glossies, and Donna Karan's book A Journey Of A Woman 20 Years.[7] In 1997 she posed for the Pirelli Calendar with Richard Avedon.. Morton has also appeared in several music videos.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:29 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the age of Terry Richardson to compare it with Annie Morton's age.\n",
- "Action 2: search(\"Terry Richardson\")\n",
- "Observation 2: Terrence Richardson (born August 14, 1965) is an American fashion and portrait photographer. He has shot advertising campaigns for Marc Jacobs, Aldo, Supreme, Sisley, Tom Ford, and Yves Saint Laurent among others, and also done work for magazines such as Rolling Stone, GQ, Vogue, Vanity Fair, Harper's Bazaar, i-D, and Vice.. Since 2001, Richardson has been accused by multiple models of sexual misconduct, although numerous other models have defended him.[2][3][4][5] In 2017, brands and magazines that had worked with Richardson in the past began distancing themselves from him, and said they would no longer employ him.[6] He has not actively worked as a photographer since 2018.[7]. Richardson was born in New York City, the son of Norma Kessler, an actress,[8][9] and Bob Richardson, a fashion photographer who struggled with schizophrenia and drug abuse.[10] Richardson's father was Irish Catholic and his mother is Jewish.[11] Following the divorce of his parents, Richardson moved to Woodstock, New York, with his mother and stepfather, English guitarist Jackie Lomax.[8] Richardson later moved to the Hollywood neighborhood of Los Angeles, where he attended Hollywood High School.[12]. He moved with his mother to Ojai, California, where he attended Nordhoff High School, when he was 16.[13] Richardson originally wanted to be a punk rock musician rather than a photographer.[13] He played bass guitar in the punk rock band The Invisible Government for four years.[14] He played bass for a variety of other punk bands in Southern California including Signal Street Alcoholics, Doggy Style, Baby Fist and Middle Finger.[8][15].\u001b[0m\n",
- "Error executing finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n",
- "\u001b[34m2024-06-15 23:16:31 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n",
- "Action 3: finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\")\n",
- "Observation 3: Error executing finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n",
- "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n",
- "\u001b[34m2024-06-15 23:16:32 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n",
- "Action 4: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n",
- "Observation 4: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n",
- "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n",
- "\u001b[34m2024-06-15 23:16:33 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n",
- "Action 5: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n",
- "Observation 5: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n",
- "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n",
- "\u001b[34m2024-06-15 23:16:34 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n",
- "Action 6: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n",
- "Observation 6: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n",
- "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n",
- "\u001b[34m2024-06-15 23:16:35 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to compare the birth dates of Annie Morton and Terry Richardson to determine who is older.\n",
- "Action 7: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n",
- "Observation 7: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:35 - [react_agent.py:345:call] - answer: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for the ages of Annie Morton and Terry Richardson to determine who is older.', action='search(\"Annie Morton\")', fun_name='search', fun_args=['Annie Morton'], fun_kwargs={}, observation='Annie Morton (born October 8, 1970) is an American model[1] born in Pennsylvania.[2] She has appeared on the covers of British Vogue, ID, Marie Claire, and other magazines. She has been photographed by Helmut Newton; Peter Lindbergh; Annie Leibovitz; Richard Avedon; Juergen Teller;[3] Paul Jasmin, Mary Ellen Mark, Stephen Shames, and Terry Richardson, and modeled for Donna Karan,[4] Givenchy, Guerlain, Chanel, Harper\\'s Bazaar, Sports Illustrated and Victoria\\'s Secret.[5] A long time vegetarian, an advocate for organic lifestyle choices and natural healthcare. She co-founded Tsi-La Organics, a \"Green Luxury\" company that creates and sells vegan, organic perfume and skin care products.[6]. She has appeared on many magazine covers and has been featured in several professional photobooks, Peter Lindbergh Selected Work, the cover of Juergen Teller By Juergen Teller, Helmut Newton Pages From The Glossies, and Donna Karan\\'s book A Journey Of A Woman 20 Years.[7] In 1997 she posed for the Pirelli Calendar with Richard Avedon.. Morton has also appeared in several music videos.'), StepOutput(step=2, thought=\"I need to search for the age of Terry Richardson to compare it with Annie Morton's age.\", action='search(\"Terry Richardson\")', fun_name='search', fun_args=['Terry Richardson'], fun_kwargs={}, observation=\"Terrence Richardson (born August 14, 1965) is an American fashion and portrait photographer. He has shot advertising campaigns for Marc Jacobs, Aldo, Supreme, Sisley, Tom Ford, and Yves Saint Laurent among others, and also done work for magazines such as Rolling Stone, GQ, Vogue, Vanity Fair, Harper's Bazaar, i-D, and Vice.. Since 2001, Richardson has been accused by multiple models of sexual misconduct, although numerous other models have defended him.[2][3][4][5] In 2017, brands and magazines that had worked with Richardson in the past began distancing themselves from him, and said they would no longer employ him.[6] He has not actively worked as a photographer since 2018.[7]. Richardson was born in New York City, the son of Norma Kessler, an actress,[8][9] and Bob Richardson, a fashion photographer who struggled with schizophrenia and drug abuse.[10] Richardson's father was Irish Catholic and his mother is Jewish.[11] Following the divorce of his parents, Richardson moved to Woodstock, New York, with his mother and stepfather, English guitarist Jackie Lomax.[8] Richardson later moved to the Hollywood neighborhood of Los Angeles, where he attended Hollywood High School.[12]. He moved with his mother to Ojai, California, where he attended Nordhoff High School, when he was 16.[13] Richardson originally wanted to be a punk rock musician rather than a photographer.[13] He played bass guitar in the punk rock band The Invisible Government for four years.[14] He played bass for a variety of other punk bands in Southern California including Signal Street Alcoholics, Doggy Style, Baby Fist and Middle Finger.[8][15].\"), StepOutput(step=3, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=4, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=5, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=6, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=7, thought='I need to compare the birth dates of Annie Morton and Terry Richardson to determine who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)')]\n",
- "\u001b[33m2024-06-15 23:16:35 - [3641068398.py:26:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth: terry richardson, pred answer: error executing finishannie morton if born before terry richardson else finishterry richardson expected else after if expression unknown line 1\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:35 - [react_agent.py:330:call] - input_query: Are Local H and For Against both from the United States?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:36 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for Local H and For Against to find out if they are both from the United States.\n",
- "Action 1: search(\"Local H\")\n",
- "Observation 1: Local H is an American rock band originally formed by guitarist and vocalist Scott Lucas, bassist Joe Pletcher, drummer Joe Daniels, and lead guitarist John Sparkman in Zion, Illinois in 1990. The members all met in high school in 1987 and founded Local H three years later. After Sparkman's departure in 1991 and Garcia's departure in 1993, Local H continued as an unorthodox two-piece setup.. Local H signed a record contract with Island Records in 1994, where they would go on to release three albums. The band's debut album, Ham Fisted (1995), was not a success and the band was nearly dropped, but the band remained on the label long enough to release their second album As Good as Dead (1996).\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:39 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for For Against to find out if they are from the United States.\n",
- "Action 2: search(\"For Against\")\n",
- "Observation 2: For Against is an American post-punk/dream pop band from Lincoln, Nebraska, United States. Despite numerous lineup shuffles and some periods of dormancy, the band has produced material steadily since 1984.. Something of an anomaly in Midwestern America, For Against—especially early in its career—has looked to a distinctly British aesthetic for inspiration, most obviously the post-punk scene exemplified by the early rosters of the Factory and 4AD record labels.. Featuring members of underground Lincoln bands Cartoon Pupils and Hymn to Joy, the band first called itself Glue, and then D.B.L., finally settling on Four Against One,[1] a name that reflected its status as a quintet at the time. Bassist Liz Panarelli's early departure gave way to keyboardist Jeffrey Runnings' shift to bass, as well as the band's truncation of its name to For Against.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:16:40 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: Both Local H and For Against are from the United States.\n",
- "Action 3: finish(\"Yes, both Local H and For Against are from the United States.\")\n",
- "Observation 3: Yes, both Local H and For Against are from the United States.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:16:40 - [react_agent.py:345:call] - answer: Yes, both Local H and For Against are from the United States.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I need to search for Local H and For Against to find out if they are both from the United States.', action='search(\"Local H\")', fun_name='search', fun_args=['Local H'], fun_kwargs={}, observation=\"Local H is an American rock band originally formed by guitarist and vocalist Scott Lucas, bassist Joe Pletcher, drummer Joe Daniels, and lead guitarist John Sparkman in Zion, Illinois in 1990. The members all met in high school in 1987 and founded Local H three years later. After Sparkman's departure in 1991 and Garcia's departure in 1993, Local H continued as an unorthodox two-piece setup.. Local H signed a record contract with Island Records in 1994, where they would go on to release three albums. The band's debut album, Ham Fisted (1995), was not a success and the band was nearly dropped, but the band remained on the label long enough to release their second album As Good as Dead (1996).\"), StepOutput(step=2, thought='I need to search for For Against to find out if they are from the United States.', action='search(\"For Against\")', fun_name='search', fun_args=['For Against'], fun_kwargs={}, observation=\"For Against is an American post-punk/dream pop band from Lincoln, Nebraska, United States. Despite numerous lineup shuffles and some periods of dormancy, the band has produced material steadily since 1984.. Something of an anomaly in Midwestern America, For Against—especially early in its career—has looked to a distinctly British aesthetic for inspiration, most obviously the post-punk scene exemplified by the early rosters of the Factory and 4AD record labels.. Featuring members of underground Lincoln bands Cartoon Pupils and Hymn to Joy, the band first called itself Glue, and then D.B.L., finally settling on Four Against One,[1] a name that reflected its status as a quintet at the time. Bassist Liz Panarelli's early departure gave way to keyboardist Jeffrey Runnings' shift to bass, as well as the band's truncation of its name to For Against.\"), StepOutput(step=3, thought='Both Local H and For Against are from the United States.', action='finish(\"Yes, both Local H and For Against are from the United States.\")', fun_name='finish', fun_args=['Yes, both Local H and For Against are from the United States.'], fun_kwargs={}, observation='Yes, both Local H and For Against are from the United States.')]\n",
- "\u001b[33m2024-06-15 23:16:40 - [3641068398.py:26:] - No. 10, question: Are Local H and For Against both from the United States?, ground truth: yes, pred answer: yes both local h and for against are from united states\u001b[0m\n",
- "EM = (0.3, [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]), FM = (0.6, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0]), average time = 8.897640180587768\n"
- ]
- }
- ],
- "source": [
- "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
- "\n",
- "# set up evaluation type\n",
- "EM_evaluator = AnswerMatchAcc(type=\"exact_match\")\n",
- "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n",
- "\n",
- "agent = ReActAgent(\n",
- " tools=tools,\n",
- " max_steps=7,\n",
- " model_client=OpenAIClient(),\n",
- " model_kwargs=gpt_model_kwargs,\n",
- " preset_prompt_kwargs=preset_prompt_kwargs,\n",
- ")\n",
- "\n",
- "num_questions = 10\n",
- "gt_answers = []\n",
- "pred_answers = []\n",
- "start_time = time.time()\n",
- "for i in range(num_questions):\n",
- " question = val_dataset[i][\"question\"]\n",
- " gt_answer = normalize_answer(\n",
- " val_dataset[i][\"answer\"]\n",
- " ) # normalize the ground truth answer\n",
- " gt_answers.append(gt_answer)\n",
- "\n",
- " # get the agent's response\n",
- " pred_answer = agent(question)\n",
- " pred_answer = normalize_answer(pred_answer)\n",
- " pred_answers.append(pred_answer)\n",
- "\n",
- " printc(\n",
- " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n",
- " color=\"yellow\",\n",
- " )\n",
- "\n",
- "end_time = time.time()\n",
- "\n",
- "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
- "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
- "avg_time = (end_time - start_time) / num_questions\n",
- "\n",
- "print(f\"EM = {em}, FM = {fm}, average time = {avg_time}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The EM is 0.3 and the FM is 0.6. Each query takes 9s in average. (The performance also depends on the success of wikipedia query connection.)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "What if we use simple LLM models to answer these questions? To test on this, we just need to remove the tools. We have a built-in ``llm_tool`` and ``finish`` that automatically handles the query. ``llm_tool`` uses the same model with the agent. We can't add the examples because the examples will mislead the agent to use non-existing tools."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\u001b[36m2024-06-15 23:17:00 - [react_agent.py:330:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:01 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The nationality of Scott Derrickson and Ed Wood needs to be determined.\n",
- "Action 1: llm_tool(\"What is the nationality of Scott Derrickson and Ed Wood?\")\n",
- "Observation 1: What is the nationality of Scott Derrickson and Ed Wood?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:03 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is asking if Scott Derrickson and Ed Wood were of the same nationality.\n",
- "Action 2: llm_tool(\"Were Scott Derrickson and Ed Wood of the same nationality?\")\n",
- "Observation 2: No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:04 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user query has been successfully answered.\n",
- "Action 3: finish(\"No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\")\n",
- "Observation 3: No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:04 - [react_agent.py:345:call] - answer: No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='The nationality of Scott Derrickson and Ed Wood needs to be determined.', action='llm_tool(\"What is the nationality of Scott Derrickson and Ed Wood?\")', fun_name='llm_tool', fun_args=['What is the nationality of Scott Derrickson and Ed Wood?'], fun_kwargs={}, observation='What is the nationality of Scott Derrickson and Ed Wood?'), StepOutput(step=2, thought='The user is asking if Scott Derrickson and Ed Wood were of the same nationality.', action='llm_tool(\"Were Scott Derrickson and Ed Wood of the same nationality?\")', fun_name='llm_tool', fun_args=['Were Scott Derrickson and Ed Wood of the same nationality?'], fun_kwargs={}, observation='No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.'), StepOutput(step=3, thought='The user query has been successfully answered.', action='finish(\"No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\")', fun_name='finish', fun_args=['No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.'], fun_kwargs={}, observation='No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.')]\n",
- "\u001b[33m2024-06-15 23:17:04 - [3230041225.py:26:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: no scott derrickson and ed wood were not of same nationality scott derrickson is american while ed wood was also american\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:04 - [react_agent.py:330:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:06 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I will use llm_tool to answer this question.\n",
- "Action 1: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n",
- "Observation 1: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:08 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I will use llm_tool to quickly find the answer.\n",
- "Action 2: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n",
- "Observation 2: The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:12 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I will use llm_tool to quickly provide the answer.\n",
- "Action 3: llm_tool(\"Shirley Temple held the government position of U.S. Representative.\")\n",
- "Observation 3: Shirley Temple did not hold the government position of U.S. Representative. She was a famous child actress and diplomat, but she did not serve as a U.S. Representative.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:13 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I will use llm_tool to quickly find the answer.\n",
- "Action 4: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n",
- "Observation 4: The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:15 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I will use llm_tool to quickly provide the answer.\n",
- "Action 5: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n",
- "Observation 5: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:17 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I will use llm_tool to quickly find the answer to the user's query.\n",
- "Action 6: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n",
- "Observation 6: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:18 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I will use llm_tool to quickly find the answer to the user's query.\n",
- "Action 7: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n",
- "Observation 7: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:18 - [react_agent.py:345:call] - answer: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I will use llm_tool to answer this question.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?'), StepOutput(step=2, thought='I will use llm_tool to quickly find the answer.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.'), StepOutput(step=3, thought='I will use llm_tool to quickly provide the answer.', action='llm_tool(\"Shirley Temple held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['Shirley Temple held the government position of U.S. Representative.'], fun_kwargs={}, observation='Shirley Temple did not hold the government position of U.S. Representative. She was a famous child actress and diplomat, but she did not serve as a U.S. Representative.'), StepOutput(step=4, thought='I will use llm_tool to quickly find the answer.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.'), StepOutput(step=5, thought='I will use llm_tool to quickly provide the answer.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?'), StepOutput(step=6, thought=\"I will use llm_tool to quickly find the answer to the user's query.\", action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?'), StepOutput(step=7, thought=\"I will use llm_tool to quickly find the answer to the user's query.\", action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?')]\n",
- "\u001b[33m2024-06-15 23:17:18 - [3230041225.py:26:] - No. 2, question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: who is woman who portrayed corliss archer in film kiss and tell\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:18 - [react_agent.py:330:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:20 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.\n",
- "Action 1: llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")\n",
- "Observation 1: That sounds like an exciting series! Do you have a favorite book or character from the series?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:22 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\n",
- "Action 2: llm_tool(\"Science fantasy young adult series with companion books about enslaved worlds and alien species.\")\n",
- "Observation 2: That sounds like an exciting series! Can you tell me more about the main plot or any specific themes in the books?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:24 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking a science fantasy young adult series narrated in the first person with companion books detailing stories of enslaved worlds and alien species.\n",
- "Action 3: llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")\n",
- "Observation 3: Hello! It sounds like you're interested in a science fantasy young adult series with a first-person narration, companion books about enslaved worlds, and alien species. That's a fascinating combination! Do you have a specific question or topic you'd like to discuss about these books?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:27 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.\n",
- "Action 4: llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")\n",
- "Observation 4: It sounds like you're looking for a science fantasy young adult series that is narrated in the first person and includes companion books about enslaved worlds and alien species. One recommendation that fits this description is the \"Red Rising\" series by Pierce Brown. This series follows the story of Darrow, a young man living in a society where people are divided by color and forced to work as slaves for the ruling class. As Darrow discovers the truth about his world and joins a rebellion, he encounters various alien species and explores different enslaved worlds. The series includes multiple companion books that delve deeper into the world and its characters.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:29 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\n",
- "Action 5: llm_tool(\"Red Rising series by Pierce Brown.\")\n",
- "Observation 5: Hello! How can I assist you with the Red Rising series by Pierce Brown?\u001b[0m\n",
- "Trying to fix potential missing commas...\n",
- "Trying to fix scaped single quotes...\n",
- "Fixed JSON string: {\n",
- " \"thought\": \"The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\",\n",
- " \"action\": \"llm_tool(\\\"Red Rising series by Pierce Brown.\\\")\",\n",
- "}\n",
- "Parsing JSON string with PyYAML...\n",
- "\u001b[34m2024-06-15 23:17:30 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\n",
- "Action 6: llm_tool(\"Red Rising series by Pierce Brown.\")\n",
- "Observation 6: Hello! How can I assist you today with the Red Rising series by Pierce Brown?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:31 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.\n",
- "Action 7: llm_tool(\"Red Rising series by Pierce Brown.\")\n",
- "Observation 7: Hello! How can I assist you today with the Red Rising series by Pierce Brown?\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:31 - [react_agent.py:345:call] - answer: Hello! How can I assist you today with the Red Rising series by Pierce Brown?\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation='That sounds like an exciting series! Do you have a favorite book or character from the series?'), StepOutput(step=2, thought='The user is inquiring about a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation='That sounds like an exciting series! Can you tell me more about the main plot or any specific themes in the books?'), StepOutput(step=3, thought='The user is seeking a science fantasy young adult series narrated in the first person with companion books detailing stories of enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation=\"Hello! It sounds like you're interested in a science fantasy young adult series with a first-person narration, companion books about enslaved worlds, and alien species. That's a fascinating combination! Do you have a specific question or topic you'd like to discuss about these books?\"), StepOutput(step=4, thought='The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation='It sounds like you\\'re looking for a science fantasy young adult series that is narrated in the first person and includes companion books about enslaved worlds and alien species. One recommendation that fits this description is the \"Red Rising\" series by Pierce Brown. This series follows the story of Darrow, a young man living in a society where people are divided by color and forced to work as slaves for the ruling class. As Darrow discovers the truth about his world and joins a rebellion, he encounters various alien species and explores different enslaved worlds. The series includes multiple companion books that delve deeper into the world and its characters.'), StepOutput(step=5, thought='The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.', action='llm_tool(\"Red Rising series by Pierce Brown.\")', fun_name='llm_tool', fun_args=['Red Rising series by Pierce Brown.'], fun_kwargs={}, observation='Hello! How can I assist you with the Red Rising series by Pierce Brown?'), StepOutput(step=6, thought='The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.', action='llm_tool(\"Red Rising series by Pierce Brown.\")', fun_name='llm_tool', fun_args=['Red Rising series by Pierce Brown.'], fun_kwargs={}, observation='Hello! How can I assist you today with the Red Rising series by Pierce Brown?'), StepOutput(step=7, thought='The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.', action='llm_tool(\"Red Rising series by Pierce Brown.\")', fun_name='llm_tool', fun_args=['Red Rising series by Pierce Brown.'], fun_kwargs={}, observation='Hello! How can I assist you today with the Red Rising series by Pierce Brown?')]\n",
- "\u001b[33m2024-06-15 23:17:31 - [3230041225.py:26:] - No. 3, question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: hello how can i assist you today with red rising series by pierce brown\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:31 - [react_agent.py:330:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:33 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I will use llm_tool to quickly answer this question.\n",
- "Action 1: llm_tool(\"Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\")\n",
- "Observation 1: No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:35 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is asking about the location of two specific landmarks.\n",
- "Action 2: finish(\"No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\")\n",
- "Observation 2: No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:35 - [react_agent.py:345:call] - answer: No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I will use llm_tool to quickly answer this question.', action='llm_tool(\"Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\")', fun_name='llm_tool', fun_args=['Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?'], fun_kwargs={}, observation='No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.'), StepOutput(step=2, thought='The user is asking about the location of two specific landmarks.', action='finish(\"No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\")', fun_name='finish', fun_args=['No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.'], fun_kwargs={}, observation='No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.')]\n",
- "\u001b[33m2024-06-15 23:17:35 - [3230041225.py:26:] - No. 4, question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: no laleli mosque and esma sultan mansion are not located in same neighborhood laleli mosque is located in laleli neighborhood of istanbul while esma sultan mansion is located in ortaköy neighborhood\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:35 - [react_agent.py:330:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:36 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n",
- "Action 1: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n",
- "Observation 1: Hello! How can I assist you today?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:38 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the specific New York city where the movie 'Big Stone Gap' is based.\n",
- "Action 2: llm_tool(\"New York city where the movie 'Big Stone Gap' is based in\")\n",
- "Observation 2: What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:40 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking information about the specific New York city where the movie 'Big Stone Gap' is based.\n",
- "Action 3: llm_tool(\"New York city where the movie 'Big Stone Gap' is based in\")\n",
- "Observation 3: What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:41 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n",
- "Action 4: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n",
- "Observation 4: Hello! How can I assist you today?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:42 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n",
- "Action 5: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n",
- "Observation 5: Hello! How can I assist you today?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:44 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n",
- "Action 6: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n",
- "Observation 6: Hello! How can I assist you today?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:45 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n",
- "Action 7: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n",
- "Observation 7: Hello! How can I assist you today?\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:45 - [react_agent.py:345:call] - answer: Hello! How can I assist you today?\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=2, thought=\"The user is inquiring about the specific New York city where the movie 'Big Stone Gap' is based.\", action='llm_tool(\"New York city where the movie \\'Big Stone Gap\\' is based in\")', fun_name='llm_tool', fun_args=[\"New York city where the movie 'Big Stone Gap' is based in\"], fun_kwargs={}, observation=\"What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\"), StepOutput(step=3, thought=\"The user is seeking information about the specific New York city where the movie 'Big Stone Gap' is based.\", action='llm_tool(\"New York city where the movie \\'Big Stone Gap\\' is based in\")', fun_name='llm_tool', fun_args=[\"New York city where the movie 'Big Stone Gap' is based in\"], fun_kwargs={}, observation=\"What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\"), StepOutput(step=4, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=5, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=6, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=7, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?')]\n",
- "\u001b[33m2024-06-15 23:17:45 - [3230041225.py:26:] - No. 5, question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: hello how can i assist you today\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:45 - [react_agent.py:330:call] - input_query: 2014 S/S is the debut album of a South Korean boy group that was formed by who?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:47 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about the debut album of a South Korean boy group and who formed it.\n",
- "Action 1: llm_tool(\"2014 S/S is the debut album of a South Korean boy group\")\n",
- "Observation 1: What is the name of the South Korean boy group that released the album 2014 S/S?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:49 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the formation of the South Korean boy group that released the album 2014 S/S.\n",
- "Action 2: llm_tool(\"Who formed the South Korean boy group that released the album 2014 S/S?\")\n",
- "Observation 2: The South Korean boy group that released the album 2014 S/S is WINNER.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:50 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking confirmation of the information provided.\n",
- "Action 3: llm_tool(\"Is WINNER the South Korean boy group that released the album 2014 S/S?\")\n",
- "Observation 3: Yes, WINNER is the South Korean boy group that released the album 2014 S/S.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:52 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is looking for the name of the person or group that formed the South Korean boy group WINNER.\n",
- "Action 4: llm_tool(\"Who formed the South Korean boy group WINNER?\")\n",
- "Observation 4: The South Korean boy group WINNER was formed by YG Entertainment through the reality survival program \"WIN: Who is Next\" in 2013.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:53 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is asking about the formation of the South Korean boy group that released the album 2014 S/S.\n",
- "Action 5: finish(\"YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\")\n",
- "Observation 5: YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:53 - [react_agent.py:345:call] - answer: YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='The user is asking about the debut album of a South Korean boy group and who formed it.', action='llm_tool(\"2014 S/S is the debut album of a South Korean boy group\")', fun_name='llm_tool', fun_args=['2014 S/S is the debut album of a South Korean boy group'], fun_kwargs={}, observation='What is the name of the South Korean boy group that released the album 2014 S/S?'), StepOutput(step=2, thought='The user is inquiring about the formation of the South Korean boy group that released the album 2014 S/S.', action='llm_tool(\"Who formed the South Korean boy group that released the album 2014 S/S?\")', fun_name='llm_tool', fun_args=['Who formed the South Korean boy group that released the album 2014 S/S?'], fun_kwargs={}, observation='The South Korean boy group that released the album 2014 S/S is WINNER.'), StepOutput(step=3, thought='The user is seeking confirmation of the information provided.', action='llm_tool(\"Is WINNER the South Korean boy group that released the album 2014 S/S?\")', fun_name='llm_tool', fun_args=['Is WINNER the South Korean boy group that released the album 2014 S/S?'], fun_kwargs={}, observation='Yes, WINNER is the South Korean boy group that released the album 2014 S/S.'), StepOutput(step=4, thought='The user is looking for the name of the person or group that formed the South Korean boy group WINNER.', action='llm_tool(\"Who formed the South Korean boy group WINNER?\")', fun_name='llm_tool', fun_args=['Who formed the South Korean boy group WINNER?'], fun_kwargs={}, observation='The South Korean boy group WINNER was formed by YG Entertainment through the reality survival program \"WIN: Who is Next\" in 2013.'), StepOutput(step=5, thought='The user is asking about the formation of the South Korean boy group that released the album 2014 S/S.', action='finish(\"YG Entertainment through the reality survival program \\'WIN: Who is Next\\' in 2013.\")', fun_name='finish', fun_args=[\"YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\"], fun_kwargs={}, observation=\"YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\")]\n",
- "\u001b[33m2024-06-15 23:17:53 - [3230041225.py:26:] - No. 6, question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?, ground truth: yg entertainment, pred answer: yg entertainment through reality survival program win who is next in 2013\u001b[0m\n",
- "\u001b[36m2024-06-15 23:17:53 - [react_agent.py:330:call] - input_query: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:54 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.\n",
- "Action 1: llm_tool(\"Aladin stage name consultant organizations performance\")\n",
- "Observation 1: Hello! How can I assist you with the information you provided?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:56 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the identity of the person known as Aladin who worked as a consultant to enhance organizational performance.\n",
- "Action 2: llm_tool(\"Aladin consultant organizational performance\")\n",
- "Observation 2: Hello! How can I assist you with Aladin consultant organizational performance?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:57 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.\n",
- "Action 3: llm_tool(\"Aladin consultant organizational performance\")\n",
- "Observation 3: Hello! How can I assist you with Aladin consultant organizational performance?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:17:59 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is asking about the person known by the stage name Aladin who helped organizations improve their performance as a consultant.\n",
- "Action 4: llm_tool(\"Aladin consultant organizations performance\")\n",
- "Observation 4: Hello! How can I assist you with the performance of Aladin consultant organizations?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:00 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is asking for information about a person known as Aladin who worked as a consultant to enhance organizational performance.\n",
- "Action 5: llm_tool(\"Aladin consultant organizations performance\")\n",
- "Observation 5: Hello! How can I assist you with the Aladin consultant organizations performance?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:01 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.\n",
- "Action 6: llm_tool(\"Aladin consultant organizations performance\")\n",
- "Observation 6: Hello! How can I assist you with the Aladin consultant organizations performance?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:03 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: The user is looking for information about the person known as Aladin, who assisted organizations in improving their performance as a consultant.\n",
- "Action 7: llm_tool(\"Aladin consultant organizations performance\")\n",
- "Observation 7: Hello! How can I assist you with the Aladin consultant organizations performance data?\u001b[0m\n",
- "\u001b[36m2024-06-15 23:18:03 - [react_agent.py:345:call] - answer: Hello! How can I assist you with the Aladin consultant organizations performance data?\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='The user is asking about a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.', action='llm_tool(\"Aladin stage name consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the information you provided?'), StepOutput(step=2, thought='The user is inquiring about the identity of the person known as Aladin who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizational performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizational performance'], fun_kwargs={}, observation='Hello! How can I assist you with Aladin consultant organizational performance?'), StepOutput(step=3, thought='The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizational performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizational performance'], fun_kwargs={}, observation='Hello! How can I assist you with Aladin consultant organizational performance?'), StepOutput(step=4, thought='The user is asking about the person known by the stage name Aladin who helped organizations improve their performance as a consultant.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the performance of Aladin consultant organizations?'), StepOutput(step=5, thought='The user is asking for information about a person known as Aladin who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the Aladin consultant organizations performance?'), StepOutput(step=6, thought='The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the Aladin consultant organizations performance?'), StepOutput(step=7, thought='The user is looking for information about the person known as Aladin, who assisted organizations in improving their performance as a consultant.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the Aladin consultant organizations performance data?')]\n",
- "\u001b[33m2024-06-15 23:18:03 - [3230041225.py:26:] - No. 7, question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?, ground truth: eenasul fateh, pred answer: hello how can i assist you with aladin consultant organizations performance data\u001b[0m\n",
- "\u001b[36m2024-06-15 23:18:03 - [react_agent.py:330:call] - input_query: The arena where the Lewiston Maineiacs played their home games can seat how many people?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:05 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about the seating capacity of the arena where the Lewiston Maineiacs played their home games.\n",
- "Action 1: llm_tool(\"Seating capacity of the arena where the Lewiston Maineiacs played their home games.\")\n",
- "Observation 1: The seating capacity of the arena where the Lewiston Maineiacs played their home games was 3,677.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:06 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the seating capacity of the arena where the Lewiston Maineiacs played their home games.\n",
- "Action 2: finish(\"3,677.\")\n",
- "Observation 2: 3,677.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:18:06 - [react_agent.py:345:call] - answer: 3,677.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='The user is asking about the seating capacity of the arena where the Lewiston Maineiacs played their home games.', action='llm_tool(\"Seating capacity of the arena where the Lewiston Maineiacs played their home games.\")', fun_name='llm_tool', fun_args=['Seating capacity of the arena where the Lewiston Maineiacs played their home games.'], fun_kwargs={}, observation='The seating capacity of the arena where the Lewiston Maineiacs played their home games was 3,677.'), StepOutput(step=2, thought='The user is inquiring about the seating capacity of the arena where the Lewiston Maineiacs played their home games.', action='finish(\"3,677.\")', fun_name='finish', fun_args=['3,677.'], fun_kwargs={}, observation='3,677.')]\n",
- "\u001b[33m2024-06-15 23:18:06 - [3230041225.py:26:] - No. 8, question: The arena where the Lewiston Maineiacs played their home games can seat how many people?, ground truth: 3677 seated, pred answer: 3677\u001b[0m\n",
- "\u001b[36m2024-06-15 23:18:06 - [react_agent.py:330:call] - input_query: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:07 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I will use the llm_tool to get the information about the ages of Annie Morton and Terry Richardson.\n",
- "Action 1: llm_tool(\"Annie Morton age\")\n",
- "Observation 1: Hello! How old is Annie Morton?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:09 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n",
- "Action 2: llm_tool(\"Annie Morton age\")\n",
- "Observation 2: Hello! How old is Annie Morton?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:10 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I will use the llm_tool to get the information about the ages of Terry Richardson.\n",
- "Action 3: llm_tool(\"Terry Richardson age\")\n",
- "Observation 3: How old is Terry Richardson?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:12 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n",
- "Action 4: llm_tool(\"Annie Morton age\")\n",
- "Observation 4: Hello! How old is Annie Morton?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:13 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n",
- "Action 5: llm_tool(\"Annie Morton age\")\n",
- "Observation 5: Hello! How old is Annie Morton?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:14 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n",
- "Action 6: llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")\n",
- "Observation 6: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:16 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n",
- "Action 7: llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")\n",
- "Observation 7: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n",
- "\u001b[36m2024-06-15 23:18:16 - [react_agent.py:345:call] - answer: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='I will use the llm_tool to get the information about the ages of Annie Morton and Terry Richardson.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=2, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=3, thought='I will use the llm_tool to get the information about the ages of Terry Richardson.', action='llm_tool(\"Terry Richardson age\")', fun_name='llm_tool', fun_args=['Terry Richardson age'], fun_kwargs={}, observation='How old is Terry Richardson?'), StepOutput(step=4, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=5, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=6, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")', fun_name='llm_tool', fun_args=['Who is older, Annie Morton or Terry Richardson?'], fun_kwargs={}, observation='Who is older, Annie Morton or Terry Richardson?'), StepOutput(step=7, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")', fun_name='llm_tool', fun_args=['Who is older, Annie Morton or Terry Richardson?'], fun_kwargs={}, observation='Who is older, Annie Morton or Terry Richardson?')]\n",
- "\u001b[33m2024-06-15 23:18:16 - [3230041225.py:26:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth: terry richardson, pred answer: who is older annie morton or terry richardson\u001b[0m\n",
- "\u001b[36m2024-06-15 23:18:16 - [react_agent.py:330:call] - input_query: Are Local H and For Against both from the United States?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:17 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about the origin of two entities, Local H and For Against.\n",
- "Action 1: llm_tool(\"Local H origin\")\n",
- "Observation 1: Hello! How can I assist you today?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:18 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the origin of For Against.\n",
- "Action 2: llm_tool(\"For Against origin\")\n",
- "Observation 2: Hello! How can I assist you today regarding the topic of origin?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:20 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user wants to know if both Local H and For Against are from the United States.\n",
- "Action 3: llm_tool(\"Local H and For Against origin\")\n",
- "Observation 3: Hello! How can I assist you today with the information about Local H and their stance on For Against?\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:23 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: Confirming the origin of Local H and For Against.\n",
- "Action 4: llm_tool(\"Are Local H and For Against both from the United States?\")\n",
- "Observation 4: Yes, both Local H and For Against are from the United States.\u001b[0m\n",
- "\u001b[34m2024-06-15 23:18:24 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is seeking confirmation about the origin of Local H and For Against.\n",
- "Action 5: finish(\"Yes, both Local H and For Against are from the United States.\")\n",
- "Observation 5: Yes, both Local H and For Against are from the United States.\u001b[0m\n",
- "\u001b[36m2024-06-15 23:18:24 - [react_agent.py:345:call] - answer: Yes, both Local H and For Against are from the United States.\u001b[0m\n",
- "step_history: [StepOutput(step=1, thought='The user is asking about the origin of two entities, Local H and For Against.', action='llm_tool(\"Local H origin\")', fun_name='llm_tool', fun_args=['Local H origin'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=2, thought='The user is inquiring about the origin of For Against.', action='llm_tool(\"For Against origin\")', fun_name='llm_tool', fun_args=['For Against origin'], fun_kwargs={}, observation='Hello! How can I assist you today regarding the topic of origin?'), StepOutput(step=3, thought='The user wants to know if both Local H and For Against are from the United States.', action='llm_tool(\"Local H and For Against origin\")', fun_name='llm_tool', fun_args=['Local H and For Against origin'], fun_kwargs={}, observation='Hello! How can I assist you today with the information about Local H and their stance on For Against?'), StepOutput(step=4, thought='Confirming the origin of Local H and For Against.', action='llm_tool(\"Are Local H and For Against both from the United States?\")', fun_name='llm_tool', fun_args=['Are Local H and For Against both from the United States?'], fun_kwargs={}, observation='Yes, both Local H and For Against are from the United States.'), StepOutput(step=5, thought='The user is seeking confirmation about the origin of Local H and For Against.', action='finish(\"Yes, both Local H and For Against are from the United States.\")', fun_name='finish', fun_args=['Yes, both Local H and For Against are from the United States.'], fun_kwargs={}, observation='Yes, both Local H and For Against are from the United States.')]\n",
- "\u001b[33m2024-06-15 23:18:24 - [3230041225.py:26:] - No. 10, question: Are Local H and For Against both from the United States?, ground truth: yes, pred answer: yes both local h and for against are from united states\u001b[0m\n",
- "EM = (0.0, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), FM = (0.4, [0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0]), average time = 8.354214406013488\n"
- ]
- }
- ],
- "source": [
- "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
- "\n",
- "# set up evaluation type\n",
- "EM_evaluator = AnswerMatchAcc(type=\"exact_match\")\n",
- "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n",
- "\n",
- "agent = ReActAgent(\n",
- " max_steps=7, model_client=OpenAIClient(), model_kwargs=gpt_model_kwargs\n",
- ")\n",
- "\n",
- "num_questions = 10\n",
- "gt_answers = []\n",
- "pred_answers = []\n",
- "start_time = time.time()\n",
- "for i in range(num_questions):\n",
- " question = val_dataset[i][\"question\"]\n",
- " gt_answer = normalize_answer(\n",
- " val_dataset[i][\"answer\"]\n",
- " ) # normalize the ground truth answer\n",
- " gt_answers.append(gt_answer)\n",
- "\n",
- " # get the agent's response\n",
- " pred_answer = agent(question)\n",
- " pred_answer = normalize_answer(pred_answer)\n",
- " pred_answers.append(pred_answer)\n",
- "\n",
- " printc(\n",
- " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n",
- " color=\"yellow\",\n",
- " )\n",
- "\n",
- "end_time = time.time()\n",
- "\n",
- "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
- "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
- "avg_time = (end_time - start_time) / num_questions\n",
- "\n",
- "print(f\"EM = {em}, FM = {fm}, average time = {avg_time}\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Without the tools and examples, EM=0 and FM=0.4. We saw hallucinations and nonsense:\n",
- "\n",
- "2024-06-15 23:17:04 - [3230041225.py:26:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: ``yes``, pred answer: ``no scott derrickson and ed wood were not of same nationality scott derrickson is american while ed wood was also american``\n",
- "\n",
- "2024-06-15 23:18:16 - [3230041225.py:26:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth:`` terry richardson``, pred answer: ``who is older annie morton or terry richardson``\n",
- "\n",
- "Therefore, using ReAct agent outperforms the base LLM.\n",
- "Meanwhile, ``LightRAG ReAct agent`` shows that the performance on 10 questions(EM=0.3)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 7. Future Improvement"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 84,
- "metadata": {},
- "outputs": [],
- "source": [
- "# TODO:\n",
- "# 1. advanced, add history to react\n",
- "# 2. add training, few shot\n",
- "# 3. llm as judge\n",
- "# 4. add picture\n",
- "# 5. better json handling, we need to store the answer output"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "lightrag-project",
- "language": "python",
- "name": "light-rag-project"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/use_cases/agent/react_agent_hotpot_qa.ipynb b/use_cases/agent/react_agent_hotpot_qa.ipynb
new file mode 100644
index 00000000..0e1d4d6d
--- /dev/null
+++ b/use_cases/agent/react_agent_hotpot_qa.ipynb
@@ -0,0 +1,1272 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# ReAct Agent Use Case"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 1. Q&A Chatbot\n",
+ "In this tutorial, we will implement ``adalflow ReAct`` to build a Q&A chatbot on [HotpotQA](https://arxiv.org/pdf/1809.09600) dataset. \n",
+ "\n",
+ "To learn more about ``adalflow ReAct``, please refer to our developer notes."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "# 2. HotpotQA Dataset\n",
+ "We are using [HotpotQA](https://arxiv.org/pdf/1809.09600). It is a Wikipedia-based multi-hop question and answer dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# load the dataset\n",
+ "from datasets import load_dataset\n",
+ "\n",
+ "dataset = load_dataset(path=\"hotpot_qa\", name=\"fullwiki\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "len of eval: 7405\n",
+ "example: {'id': '5a8b57f25542995d1e6f1371', 'question': 'Were Scott Derrickson and Ed Wood of the same nationality?', 'answer': 'yes', 'type': 'comparison', 'level': 'hard', 'supporting_facts': {'title': ['Scott Derrickson', 'Ed Wood'], 'sent_id': [0, 0]}, 'context': {'title': ['Adam Collis', 'Ed Wood (film)', 'Tyler Bates', 'Doctor Strange (2016 film)', 'Hellraiser: Inferno', 'Sinister (film)', 'Deliver Us from Evil (2014 film)', 'Woodson, Arkansas', 'Conrad Brooks', 'The Exorcism of Emily Rose'], 'sentences': [['Adam Collis is an American filmmaker and actor.', ' He attended the Duke University from 1986 to 1990 and the University of California, Los Angeles from 2007 to 2010.', ' He also studied cinema at the University of Southern California from 1991 to 1997.', ' Collis first work was the assistant director for the Scott Derrickson\\'s short \"Love in the Ruins\" (1995).', ' In 1998, he played \"Crankshaft\" in Eric Koyanagi\\'s \"Hundred Percent\".'], ['Ed Wood is a 1994 American biographical period comedy-drama film directed and produced by Tim Burton, and starring Johnny Depp as cult filmmaker Ed Wood.', \" The film concerns the period in Wood's life when he made his best-known films as well as his relationship with actor Bela Lugosi, played by Martin Landau.\", ' Sarah Jessica Parker, Patricia Arquette, Jeffrey Jones, Lisa Marie, and Bill Murray are among the supporting cast.'], ['Tyler Bates (born June 5, 1965) is an American musician, music producer, and composer for films, television, and video games.', ' Much of his work is in the action and horror film genres, with films like \"Dawn of the Dead, 300, Sucker Punch,\" and \"John Wick.\"', ' He has collaborated with directors like Zack Snyder, Rob Zombie, Neil Marshall, William Friedkin, Scott Derrickson, and James Gunn.', ' With Gunn, he has scored every one of the director\\'s films; including \"Guardians of the Galaxy\", which became one of the highest grossing domestic movies of 2014, and its 2017 sequel.', ' In addition, he is also the lead guitarist of the American rock band Marilyn Manson, and produced its albums \"The Pale Emperor\" and \"Heaven Upside Down\".'], ['Doctor Strange is a 2016 American superhero film based on the Marvel Comics character of the same name, produced by Marvel Studios and distributed by Walt Disney Studios Motion Pictures.', ' It is the fourteenth film of the Marvel Cinematic Universe (MCU).', ' The film was directed by Scott Derrickson, who wrote it with Jon Spaihts and C. Robert Cargill, and stars Benedict Cumberbatch as Stephen Strange, along with Chiwetel Ejiofor, Rachel McAdams, Benedict Wong, Michael Stuhlbarg, Benjamin Bratt, Scott Adkins, Mads Mikkelsen, and Tilda Swinton.', ' In \"Doctor Strange\", surgeon Strange learns the mystic arts after a career-ending car accident.'], ['Hellraiser: Inferno (also known as Hellraiser V: Inferno) is a 2000 American horror film.', ' It is the fifth installment in the \"Hellraiser\" series and the first \"Hellraiser\" film to go straight-to-DVD.', ' It was directed by Scott Derrickson and released on October 3, 2000.', \" The film concerns a corrupt detective who discovers Lemarchand's box at a crime scene.\", \" The film's reviews were mixed.\"], ['Sinister is a 2012 supernatural horror film directed by Scott Derrickson and written by Derrickson and C. Robert Cargill.', ' It stars Ethan Hawke as fictional true-crime writer Ellison Oswalt who discovers a box of home movies in his attic that puts his family in danger.'], ['Deliver Us from Evil is a 2014 American supernatural horror film directed by Scott Derrickson and produced by Jerry Bruckheimer.', ' The film is officially based on a 2001 non-fiction book entitled \"Beware the Night\" by Ralph Sarchie and Lisa Collier Cool, and its marketing campaign highlighted that it was \"inspired by actual accounts\".', ' The film stars Eric Bana, Édgar Ramírez, Sean Harris, Olivia Munn, and Joel McHale in the main roles and was released on July 2, 2014.'], ['Woodson is a census-designated place (CDP) in Pulaski County, Arkansas, in the United States.', ' Its population was 403 at the 2010 census.', ' It is part of the Little Rock–North Little Rock–Conway Metropolitan Statistical Area.', ' Woodson and its accompanying Woodson Lake and Wood Hollow are the namesake for Ed Wood Sr., a prominent plantation owner, trader, and businessman at the turn of the 20th century.', ' Woodson is adjacent to the Wood Plantation, the largest of the plantations own by Ed Wood Sr.'], ['Conrad Brooks (born Conrad Biedrzycki on January 3, 1931 in Baltimore, Maryland) is an American actor.', ' He moved to Hollywood, California in 1948 to pursue a career in acting.', ' He got his start in movies appearing in Ed Wood films such as \"Plan 9 from Outer Space\", \"Glen or Glenda\", and \"Jail Bait.\"', ' He took a break from acting during the 1960s and 1970s but due to the ongoing interest in the films of Ed Wood, he reemerged in the 1980s and has become a prolific actor.', ' He also has since gone on to write, produce and direct several films.'], ['The Exorcism of Emily Rose is a 2005 American legal drama horror film directed by Scott Derrickson and starring Laura Linney and Tom Wilkinson.', ' The film is loosely based on the story of Anneliese Michel and follows a self-proclaimed agnostic who acts as defense counsel (Linney) representing a parish priest (Wilkinson), accused by the state of negligent homicide after he performed an exorcism.']]}}\n",
+ "attributes in each sample: ['id', 'question', 'answer', 'type', 'level', 'supporting_facts', 'context']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# check the data sample\n",
+ "test_sample = dataset[\"validation\"][0]\n",
+ "print(f\"len of eval: {len(dataset['validation'])}\")\n",
+ "print(f\"example: {test_sample}\")\n",
+ "print(f\"attributes in each sample: {list(test_sample.keys())}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "question: Were Scott Derrickson and Ed Wood of the same nationality?\n",
+ "answer: yes\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Each sample contains a question and a corresponding answer.\n",
+ "print(f\"question: {test_sample.get('question')}\")\n",
+ "print(f\"answer: {test_sample.get('answer')}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 3. Set up\n",
+ "Please make sure you have set the model client APIs before running the agent. Now import the necessary packages."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import dotenv\n",
+ "from adalflow.components.model_client import OpenAIClient\n",
+ "from adalflow.components.agent.react import ReActAgent\n",
+ "from adalflow.core.func_tool import FunctionTool\n",
+ "\n",
+ "import time\n",
+ "\n",
+ "# load evironment, please set the relative path to your .env file that includes the api key\n",
+ "dotenv.load_dotenv(dotenv_path=\"../../.env\", override=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 4. Create Agent\n",
+ "To create an gent, we need to define the basic components.\n",
+ "\n",
+ "## Tools\n",
+ "Firstly, we need to specify what functions the agent will need to answer the question. In this case, we are answering the Wikipedia-based questions, we will allow the agent to **search** Wikipedia api. The [ReAct Paper](https://arxiv.org/pdf/2210.03629) includes a **lookup** function that serves as Ctrl+F functionality on the browser.\n",
+ "\n",
+ "As ``adalflow ReAct`` has a built in ``finish`` function, we don't need to define by ourselves."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "from bs4 import BeautifulSoup\n",
+ "import re\n",
+ "import string\n",
+ "\n",
+ "\n",
+ "# copy code from the paper\n",
+ "def clean_str(p):\n",
+ " return p.encode().decode(\"unicode-escape\").encode(\"latin1\").decode(\"utf-8\")\n",
+ "\n",
+ "\n",
+ "# normalization copied from the paper's code\n",
+ "def normalize_answer(s):\n",
+ " def remove_articles(text):\n",
+ " return re.sub(r\"\\b(a|an|the)\\b\", \" \", text)\n",
+ "\n",
+ " def white_space_fix(text):\n",
+ " return \" \".join(text.split())\n",
+ "\n",
+ " def remove_punc(text):\n",
+ " exclude = set(string.punctuation)\n",
+ " return \"\".join(ch for ch in text if ch not in exclude)\n",
+ "\n",
+ " def lower(text):\n",
+ " return text.lower()\n",
+ "\n",
+ " return white_space_fix(remove_articles(remove_punc(lower(s))))\n",
+ "\n",
+ "\n",
+ "def search(entity: str) -> str:\n",
+ " \"\"\"\n",
+ " searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.\n",
+ " \"\"\"\n",
+ " # Format the entity for URL encoding\n",
+ " entity_formatted = entity.replace(\" \", \"+\")\n",
+ " url = f\"https://en.wikipedia.org/w/index.php?search={entity_formatted}\"\n",
+ "\n",
+ " # Fetch the page\n",
+ " response = requests.get(url)\n",
+ " soup = BeautifulSoup(response.text, \"html.parser\")\n",
+ "\n",
+ " # Check if the exact page was found or suggest similar items\n",
+ " # when is detected, it means the entity page is not found on wikipedia\n",
+ " result_divs = soup.find_all(\"div\", {\"class\": \"mw-search-result-heading\"})\n",
+ "\n",
+ " if (\n",
+ " result_divs\n",
+ " ): # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities\n",
+ " # get Similar results\n",
+ " similar_titles = [div.a.get_text() for div in result_divs]\n",
+ " return f\"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}\" # return the top 5 similar titles\n",
+ " else:\n",
+ " # the paper uses page to represent content in
\n",
+ " # Extract xontent\n",
+ " page_list = [\n",
+ " p.get_text().strip() for p in soup.find_all(\"p\") + soup.find_all(\"ul\")\n",
+ " ]\n",
+ " # TODO: Recursive search, if find any concept that needs more search then call search again\n",
+ " # if any(\"may refer to:\" in p for p in page_list):\n",
+ " # search(entity)\n",
+ "\n",
+ " # restructure & clean the page content following the paper's logic\n",
+ " page = \"\"\n",
+ " for p in page_list:\n",
+ " if len(p.split(\" \")) > 2:\n",
+ " page += clean_str(p)\n",
+ " if not p.endswith(\"\\n\"):\n",
+ " page += \"\\n\"\n",
+ " paragraphs = page.split(\"\\n\")\n",
+ " paragraphs = [p.strip() for p in paragraphs if p.strip()]\n",
+ "\n",
+ " sentences = []\n",
+ " for p in paragraphs:\n",
+ " sentences += p.split(\". \")\n",
+ " sentences = [s.strip() + \".\" for s in sentences if s.strip()]\n",
+ "\n",
+ " # return the first 5 sentences\n",
+ " if sentences:\n",
+ " return (\n",
+ " \" \".join(sentences[:5]) if len(sentences) >= 5 else \" \".join(sentences)\n",
+ " )\n",
+ " else:\n",
+ " return \"No content found on this page.\"\n",
+ "\n",
+ " # TODO: clean the paragraphs and return the searched content\n",
+ "\n",
+ "\n",
+ "def lookup(text: str, keyword: str) -> str:\n",
+ " \"\"\"\n",
+ " returns the sentences containing keyword in the current passage.\n",
+ " \"\"\"\n",
+ " sentences = text.split(\".\")\n",
+ " matching_sentences = [\n",
+ " sentence.strip() + \".\"\n",
+ " for sentence in sentences\n",
+ " if keyword.lower() in sentence.lower()\n",
+ " ]\n",
+ " if not matching_sentences:\n",
+ " return \"No sentences found with the keyword.\"\n",
+ " else:\n",
+ " return \" \".join(\n",
+ " matching_sentences\n",
+ " ) # Join all matching sentences into a single string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# set up tools for the agent\n",
+ "tools = [FunctionTool(fn=search)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Examples\n",
+ "The next thing to add is examples. Few shot prompt engineering is a common practice to improve the model performance.\n",
+ "\n",
+ "Let's use the paper's examples. The paper has 6 examples altogether."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "examples = [\n",
+ " \"\"\"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\n",
+ "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\n",
+ "Action 1: search(\"Colorado orogeny\")\n",
+ "Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\n",
+ "Thought 2: It does not mention the eastern sector. So I need to look up eastern sector.\n",
+ "Action 2: lookup(\"eastern sector\")\n",
+ "Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\n",
+ "Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\n",
+ "Action 3: search(\"High Plains\")\n",
+ "Observation 3: High Plains refers to one of two distinct land regions:\n",
+ "Thought 4: I need to instead search High Plains (United States).\n",
+ "Action 4: search(\"High Plains (United States)\")\n",
+ "Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\n",
+ "Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\n",
+ "Action 5: finish(\"1,800 to 7,000 ft\")\"\"\",\n",
+ " \"\"\"Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\n",
+ "Thought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\n",
+ "Action 1: search(\"Milhouse\")\n",
+ "Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\n",
+ "Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".\n",
+ "Action 2: lookup(\"named after\")\n",
+ "Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \n",
+ "Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\n",
+ "Action 3: finish(\"Richard Nixon\")\"\"\",\n",
+ " \"\"\"Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\n",
+ "Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\n",
+ "Action 1: search(\"Adam Clayton Powell\")\n",
+ "Observation 1: Could not find [\"Adam Clayton Powell\"]. Similar: ['Adam Clayton Powell III', 'Seventh Avenue (Manhattan)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito'].\n",
+ "Thought 2: To find the documentary, I can search Adam Clayton Powell (film).\n",
+ "Action 2: search(\"Adam Clayton Powell (film)\")\n",
+ "Observation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\n",
+ "The film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\n",
+ "Thought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\n",
+ "Action 3: finish(\"The Saimaa Gesture\")\"\"\",\n",
+ " \"\"\"Question: What profession does Nicholas Ray and Elia Kazan have in common?\n",
+ "Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\n",
+ "Action 1: search(\"Nicholas Ray\")\n",
+ "Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 – June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\n",
+ "Thought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\n",
+ "Action 2: search(\"Elia Kazan\")\n",
+ "Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\n",
+ "Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\n",
+ "Action 3: finish(\"director, screenwriter, actor\")\"\"\",\n",
+ " \"\"\"Question: Which magazine was started first Arthur's Magazine or First for Women?\n",
+ "Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\n",
+ "Action 1: search(\"Arthur's Magazine\")\n",
+ "Observation 1: Arthur's Magazine (1844-1846) was an American literary periodical published in Philadelphia in the 19th century. \n",
+ "Thought 2: Arthur's Magazine was started in 1844. I need to search First for Women next.\n",
+ "Action 2: search(\"First for Women\")\n",
+ "Observation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \n",
+ "Thought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\n",
+ "Action 3: finish(\"Arthur's Magazine\")\"\"\",\n",
+ " \"\"\"Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\n",
+ "Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\n",
+ "Action 1: search(\"Pavel Urysohn\")\n",
+ "Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 â August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\n",
+ "Thought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\n",
+ "Action 2: search(\"Leonid Levin\")\n",
+ "Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \n",
+ "Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \n",
+ "Action 3: finish(\"yes\")\"\"\",\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# preset up the examples as prompt_kwargs, the examples will be included in the system prompt\n",
+ "\n",
+ "# convert this example in FunctionExpression\n",
+ "# import adalflow as adal\n",
+ "# from adalflow.core.types import FunctionExpression\n",
+ "\n",
+ "# eg_1 = {\n",
+ "# \"Question\": \"What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\",\n",
+ "# \"Thought 1\": \"I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\",\n",
+ "# \"Action 1\": \"search\",\n",
+ "# \"kwargs\": {\"entity\": \"Colorado orogeny\"},\n",
+ "# \"Observation 1\": \"The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\",\n",
+ "# \"Thought 2\": \"It does not mention the eastern sector. So I need to look up eastern sector.\",\n",
+ "# \"Action 2\": \"lookup('eastern sector')\",\n",
+ "# \"Action 2\": \"lookup\",\n",
+ "# \"kwargs\": {\"text\": \"eastern sector\", \"keyword\": \"eastern sector\"},\n",
+ "# \"Observation 2\": \"(Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\",\n",
+ "# \"Thought 3\": \"The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\",\n",
+ "# \"Action 3\": \"search('High Plains')\",\n",
+ "# \"Observation 3\": \"High Plains refers to one of two distinct land regions:\",\n",
+ "# \"Thought 4\": \"I need to instead search High Plains (United States).\",\n",
+ "# \"Action 4\": \"search('High Plains (United States)')\",\n",
+ "# \"Observation 4\": \"The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\",\n",
+ "# \"Thought 5\": \"High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\",\n",
+ "# \"Action 5\": \"finish('1,800 to 7,000 ft')\"\n",
+ "# }\n",
+ "\n",
+ "# examples_expression = []\n",
+ "# for example in examples:\n",
+ "# ex_exp = FunctionExpression(thought=example)\n",
+ "\n",
+ "# preset_prompt_kwargs = {\"examples\": examples}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Model\n",
+ "\n",
+ "Next, we can choose the model to call. In this example we will use OpenAIClient ``gpt-3.5-turbo`` model. We will set the ``temperature`` at 0.0 to make the response as consistent as possible."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gpt_model_kwargs = {\n",
+ " \"model\": \"gpt-3.5-turbo\",\n",
+ " \"temperature\": 0.0,\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Agent\n",
+ "Combining the previous components, we can define the agent."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ReActAgent(\n",
+ " max_steps=3, add_llm_as_fallback=True, \n",
+ " (tool_manager): ToolManager(Tools: [FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='search', func_desc='search(entity: str) -> str\\n\\n searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.\\n ', func_parameters={'type': 'object', 'properties': {'entity': {'type': 'str'}}, 'required': ['entity']})), FunctionTool(fn: .llm_tool at 0x1379487c0>, async: False, definition: FunctionDefinition(func_name='llm_tool', func_desc=\"llm_tool(input: str) -> str\\nI answer any input query with llm's world knowledge. Use me as a fallback tool or when the query is simple.\", func_parameters={'type': 'object', 'properties': {'input': {'type': 'str'}}, 'required': ['input']})), FunctionTool(fn: .finish at 0x137948400>, async: False, definition: FunctionDefinition(func_name='finish', func_desc='finish(answer: str) -> str\\nFinish the task with answer.', func_parameters={'type': 'object', 'properties': {'answer': {'type': 'str'}}, 'required': ['answer']}))], Additional Context: {})\n",
+ " (planner): Generator(\n",
+ " model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.0}, trainable_prompt_kwargs=[]\n",
+ " (prompt): Prompt(\n",
+ " template: \n",
+ " {# role/task description #}\n",
+ " You are a helpful assistant.\n",
+ " Answer the user's query using the tools provided below with minimal steps and maximum accuracy.\n",
+ " {# REACT instructions #}\n",
+ " Each step you will read the previous Thought, Action, and Observation(execution result of the action) and then provide the next Thought and Action.\n",
+ " {# Tools #}\n",
+ " {% if tools %}\n",
+ " \n",
+ " You available tools are:\n",
+ " {% for tool in tools %}\n",
+ " {{ loop.index }}.\n",
+ " {{tool}}\n",
+ " ------------------------\n",
+ " {% endfor %}\n",
+ " \n",
+ " {% endif %}\n",
+ " {# output format and examples for output format #}\n",
+ " \n",
+ " {{output_format_str}}\n",
+ " \n",
+ " \n",
+ " {# Task specification to teach the agent how to think using 'divide and conquer' strategy #}\n",
+ " - For simple queries: Directly call the ``finish`` action and provide the answer.\n",
+ " - For complex queries:\n",
+ " - Step 1: Read the user query and potentially divide it into subqueries. And get started with the first subquery.\n",
+ " - Call one available tool at a time to solve each subquery/subquestion. \\\n",
+ " - At step 'finish', join all subqueries answers and finish the task.\n",
+ " Remember:\n",
+ " - Action must call one of the above tools with name. It can not be empty.\n",
+ " - You will always end with 'finish' action to finish the task. The answer can be the final answer or failure message.\n",
+ " \n",
+ " \n",
+ " -----------------\n",
+ " User query:\n",
+ " {{ input_str }}\n",
+ " {# Step History #}\n",
+ " {% if step_history %}\n",
+ " \n",
+ " Your previous steps:\n",
+ " {% for history in step_history %}\n",
+ " Step {{ loop.index }}.\n",
+ " \"Thought\": \"{{history.action.thought}}\",\n",
+ " \"Action\": \"{{history.action.action}}\",\n",
+ " \"Observation\": \"{{history.observation}}\"\n",
+ " ------------------------\n",
+ " {% endfor %}\n",
+ " \n",
+ " {% endif %}\n",
+ " You:, prompt_kwargs: {'tools': ['func_name: search\\nfunc_desc: \"search(entity: str) -> str\\\\n\\\\n searches the exact entity on Wikipedia\\\\\\n \\\\ and returns the first paragraph if it exists. If not, it will return some similar\\\\\\n \\\\ entities to search.\\\\n \"\\nfunc_parameters:\\n type: object\\n properties:\\n entity:\\n type: str\\n required:\\n - entity', \"func_name: llm_tool\\nfunc_desc: 'llm_tool(input: str) -> str\\n\\n I answer any input query with llm''s world knowledge. Use me as a fallback tool\\n or when the query is simple.'\\nfunc_parameters:\\n type: object\\n properties:\\n input:\\n type: str\\n required:\\n - input\", \"func_name: finish\\nfunc_desc: 'finish(answer: str) -> str\\n\\n Finish the task with answer.'\\nfunc_parameters:\\n type: object\\n properties:\\n answer:\\n type: str\\n required:\\n - answer\"], 'output_format_str': 'Your output should be formatted as a standard JSON instance with the following schema:\\n```\\n{\\n \"question\": \"The question to ask the LLM (Optional[str]) (optional)\",\\n \"thought\": \"Why the function is called (Optional[str]) (optional)\",\\n \"action\": \"FuncName() Valid function call expression. Example: \\\\\"FuncName(a=1, b=2)\\\\\" Follow the data type specified in the function parameters.e.g. for Type object with x,y properties, use \\\\\"ObjectType(x=1, y=2) (str) (required)\"\\n}\\n```\\nExamples:\\n```\\n{\\n \"question\": null,\\n \"thought\": \"I have finished the task.\",\\n \"action\": \"finish(answer=\\\\\"final answer: \\'answer\\'\\\\\")\"\\n}\\n________\\n```\\n-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\\n-Use double quotes for the keys and string values.\\n-DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\\n-Follow the JSON formatting conventions.'}, prompt_variables: ['input_str', 'tools', 'step_history', 'output_format_str']\n",
+ " )\n",
+ " (model_client): OpenAIClient()\n",
+ " (output_processors): JsonOutputParser(\n",
+ " data_class=FunctionExpression, examples=[FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"final answer: \\'answer\\'\")')], exclude_fields=None, include_fields=None, return_data_class=True\n",
+ " (output_format_prompt): Prompt(\n",
+ " template: Your output should be formatted as a standard JSON instance with the following schema:\n",
+ " ```\n",
+ " {{schema}}\n",
+ " ```\n",
+ " {% if example %}\n",
+ " Examples:\n",
+ " ```\n",
+ " {{example}}\n",
+ " ```\n",
+ " {% endif %}\n",
+ " -Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n",
+ " -Use double quotes for the keys and string values.\n",
+ " -DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\n",
+ " -Follow the JSON formatting conventions., prompt_variables: ['example', 'schema']\n",
+ " )\n",
+ " (output_processors): JsonParser()\n",
+ " )\n",
+ " )\n",
+ ")"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# max_steps refers to how many thought-action round we allow the model to perform\n",
+ "# to save resources, let's use 3 here\n",
+ "agent = ReActAgent(\n",
+ " tools=tools,\n",
+ " max_steps=3,\n",
+ " model_client=OpenAIClient(),\n",
+ " model_kwargs=gpt_model_kwargs,\n",
+ " # preset_prompt_kwargs=preset_prompt_kwargs,\n",
+ ")\n",
+ "agent"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import importlib\n",
+ "import adalflow\n",
+ "\n",
+ "importlib.reload(adalflow)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 5. Q & A\n",
+ "Next we can use the agent to answer our questions. Let's run 5 examples. We will use the validation data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dataset({\n",
+ " features: ['id', 'question', 'answer', 'type', 'level', 'supporting_facts', 'context'],\n",
+ " num_rows: 7405\n",
+ "})"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val_dataset = dataset[\"validation\"]\n",
+ "val_dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "``LightRAG`` provides a ``printc`` function. You can utilize it to show colored console output for angent."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[31m2024-12-19 13:58:48 - [react.py:285:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:49 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for the nationalities of Scott Derrickson and Ed Wood.', action=\"search(entity='Scott Derrickson')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Scott Derrickson'}), observation='Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012), and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:50 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I will search for the nationality of Ed Wood.', action=\"search(entity='Ed Wood')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Ed Wood'}), observation=\"Edward Davis Wood Jr. (October 10, 1924\\xa0– December 10, 1978) was an American filmmaker, actor, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:51 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"Scott Derrickson and Ed Wood were both of American nationality.\"'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Scott Derrickson and Ed Wood were both of American nationality.'}), observation='Scott Derrickson and Ed Wood were both of American nationality.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 13:58:51 - [react.py:299:call] - answer:\n",
+ " Scott Derrickson and Ed Wood were both of American nationality.\u001b[0m\n",
+ "\u001b[33m2024-12-19 13:58:51 - [530968165.py:14:] - question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: scott derrickson and ed wood were both of american nationality\u001b[0m\n",
+ "\u001b[31m2024-12-19 13:58:51 - [react.py:285:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:52 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for the government position held by the woman who portrayed Corliss Archer in the film Kiss and Tell.', action=\"search(entity='Shirley Temple')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Shirley Temple'}), observation=\"This is an accepted version of this page. Shirley Temple Black (born Shirley Jane Temple; April 23, 1928 – February 10, 2014) was an American actress, singer, dancer, and diplomat, who was Hollywood's number-one box-office draw as a child actress from 1934 to 1938. Later, she was named United States Ambassador to Ghana and Czechoslovakia, and also served as Chief of Protocol of the United States.. Temple began her film career in 1931 when she was three years old and was well-known for her performance in Bright Eyes, which was released in 1934. She won a special Juvenile Academy Award in February 1935 for her outstanding contribution as a juvenile performer in motion pictures during 1934 and continued to appear in popular films through the remainder of the 1930s, although her subsequent films became less popular as she grew older.[1] She appeared in her last film, A Kiss for Corliss, in 1949.[2][3].\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:53 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I have finished the task.', action=\"finish(answer='The government positions held by Shirley Temple, who portrayed Corliss Archer in the film Kiss and Tell, include being the United States Ambassador to Ghana and Czechoslovakia, as well as serving as Chief of Protocol of the United States.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'The government positions held by Shirley Temple, who portrayed Corliss Archer in the film Kiss and Tell, include being the United States Ambassador to Ghana and Czechoslovakia, as well as serving as Chief of Protocol of the United States.'}), observation='The government positions held by Shirley Temple, who portrayed Corliss Archer in the film Kiss and Tell, include being the United States Ambassador to Ghana and Czechoslovakia, as well as serving as Chief of Protocol of the United States.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 13:58:53 - [react.py:299:call] - answer:\n",
+ " The government positions held by Shirley Temple, who portrayed Corliss Archer in the film Kiss and Tell, include being the United States Ambassador to Ghana and Czechoslovakia, as well as serving as Chief of Protocol of the United States.\u001b[0m\n",
+ "\u001b[33m2024-12-19 13:58:53 - [530968165.py:14:] - question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: government positions held by shirley temple who portrayed corliss archer in film kiss and tell include being united states ambassador to ghana and czechoslovakia as well as serving as chief of protocol of united states\u001b[0m\n",
+ "\u001b[31m2024-12-19 13:58:53 - [react.py:285:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:55 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question='What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?', thought='I will search for the science fantasy young adult series with companion books about enslaved worlds and alien species.', action=\"search(entity='science fantasy young adult series with companion books about enslaved worlds and alien species')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'science fantasy young adult series with companion books about enslaved worlds and alien species'}), observation=\"Could not find exact page for 'science fantasy young adult series with companion books about enslaved worlds and alien species'. Similar topics: ['Animorphs', 'Feminist science fiction', 'LGBT themes in speculative fiction', 'Apocalyptic and post-apocalyptic fiction', 'Last Legionary']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:56 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I will use LLM as a fallback tool to provide information on the science fantasy young adult series with companion books about enslaved worlds and alien species.', action=\"llm_tool(input='What is a science fantasy young adult series with companion books about enslaved worlds and alien species?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'What is a science fantasy young adult series with companion books about enslaved worlds and alien species?'}), observation='One popular science fantasy young adult series that fits your description is the \"Lorien Legacies\" series by Pittacus Lore. The series includes companion books that delve into the enslaved worlds and alien species featured in the main novels. The series follows a group of alien teenagers known as the Garde who are on Earth hiding from their enemies, the Mogadorians, while developing their powers to fight back.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:57 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"One popular science fantasy young adult series that fits your description is the \\'Lorien Legacies\\' series by Pittacus Lore. The series includes companion books that delve into the enslaved worlds and alien species featured in the main novels. The series follows a group of alien teenagers known as the Garde who are on Earth hiding from their enemies, the Mogadorians, while developing their powers to fight back.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"One popular science fantasy young adult series that fits your description is the 'Lorien Legacies' series by Pittacus Lore. The series includes companion books that delve into the enslaved worlds and alien species featured in the main novels. The series follows a group of alien teenagers known as the Garde who are on Earth hiding from their enemies, the Mogadorians, while developing their powers to fight back.\"}), observation=\"One popular science fantasy young adult series that fits your description is the 'Lorien Legacies' series by Pittacus Lore. The series includes companion books that delve into the enslaved worlds and alien species featured in the main novels. The series follows a group of alien teenagers known as the Garde who are on Earth hiding from their enemies, the Mogadorians, while developing their powers to fight back.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 13:58:57 - [react.py:299:call] - answer:\n",
+ " One popular science fantasy young adult series that fits your description is the 'Lorien Legacies' series by Pittacus Lore. The series includes companion books that delve into the enslaved worlds and alien species featured in the main novels. The series follows a group of alien teenagers known as the Garde who are on Earth hiding from their enemies, the Mogadorians, while developing their powers to fight back.\u001b[0m\n",
+ "\u001b[33m2024-12-19 13:58:57 - [530968165.py:14:] - question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: one popular science fantasy young adult series that fits your description is lorien legacies series by pittacus lore series includes companion books that delve into enslaved worlds and alien species featured in main novels series follows group of alien teenagers known as garde who are on earth hiding from their enemies mogadorians while developing their powers to fight back\u001b[0m\n",
+ "\u001b[31m2024-12-19 13:58:57 - [react.py:285:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:58:59 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for information about the locations of Laleli Mosque and Esma Sultan Mansion.', action=\"search(entity='Laleli Mosque')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Laleli Mosque'}), observation=\"The Laleli Mosque (Turkish: Laleli Camii, lit.\\u2009'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:59:00 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I have found information about the Laleli Mosque. Now, I will search for information about the Esma Sultan Mansion.', action=\"search(entity='Esma Sultan Mansion')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Esma Sultan Mansion'}), observation='The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:59:01 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I have found information about both the Laleli Mosque and the Esma Sultan Mansion.', action='finish(answer=\"Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.'}), observation='Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 13:59:01 - [react.py:299:call] - answer:\n",
+ " Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.\u001b[0m\n",
+ "\u001b[33m2024-12-19 13:59:01 - [530968165.py:14:] - question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: yes laleli mosque and esma sultan mansion are not located in same neighborhood laleli mosque is in laleli fatih istanbul while esma sultan mansion is in ortaköy neighborhood of istanbul\u001b[0m\n",
+ "\u001b[31m2024-12-19 13:59:01 - [react.py:285:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:59:03 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought=\"I will search for the director of the romantic comedy 'Big Stone Gap' on Wikipedia.\", action=\"search(entity='Big Stone Gap (film)')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Big Stone Gap (film)'}), observation=\"Big Stone Gap is a 2014 American drama romantic comedy film written and directed by Adriana Trigiani and produced by Donna Gigliotti for Altar Identity Studios, a subsidiary of Media Society. Based on Trigiani's 2000 best-selling novel of the same name, the story is set in the actual Virginia town of Big Stone Gap circa 1970s. The film had its world premiere at the Virginia Film Festival on November 6, 2014.[2]. The film was released on October 9, 2015, by Picturehouse.[3] The film was released in Blu-Ray by Universal Pictures Home Entertainment on February 2, 2016.. In 1978, 40-year-old independent woman Ave Maria Mulligan owns her dead father's pharmacy in her hometown of Big Stone Gap, Virginia.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:59:04 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought=\"I will provide the director of the romantic comedy 'Big Stone Gap'.\", action='finish(answer=\"The director of the romantic comedy \\'Big Stone Gap\\' is Adriana Trigiani.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"The director of the romantic comedy 'Big Stone Gap' is Adriana Trigiani.\"}), observation=\"The director of the romantic comedy 'Big Stone Gap' is Adriana Trigiani.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 13:59:04 - [react.py:299:call] - answer:\n",
+ " The director of the romantic comedy 'Big Stone Gap' is Adriana Trigiani.\u001b[0m\n",
+ "\u001b[33m2024-12-19 13:59:04 - [530968165.py:14:] - question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: director of romantic comedy big stone gap is adriana trigiani\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "from adalflow.utils.logger import printc\n",
+ "\n",
+ "num_questions = 5\n",
+ "for i in range(num_questions):\n",
+ " question = val_dataset[i][\"question\"]\n",
+ " gt_answer = normalize_answer(\n",
+ " val_dataset[i][\"answer\"]\n",
+ " ) # normalize the ground truth answer\n",
+ "\n",
+ " # get the agent's response\n",
+ " pred_answer = agent(question)\n",
+ " pred_answer = normalize_answer(pred_answer)\n",
+ "\n",
+ " printc(\n",
+ " f\"question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n",
+ " color=\"yellow\",\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 6. Evaluation\n",
+ "\n",
+ "Now you will see that we have the ``exact correct answer`` for some questions:\n",
+ "\n",
+ "question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: ``yes`` pred answer: ``yes``\n",
+ "\n",
+ "question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: ``animorphs``, pred answer: ``animorphs``\n",
+ "\n",
+ "Sometimes the agent performs correctly but not in the same format with the ground truth. E.g. ground truth: ``no``, pred answer: ``no, they are not the same``. This is what we can tolerate.\n",
+ "\n",
+ "But how to evaluate if the agent is doing well, or if our tools, examples, and prompt implementations work well? We need to evaluate it.\n",
+ "\n",
+ "1. Exact Match(EM)\n",
+ "Exact Match is what the paper is using. Only when the normalized agent response is the same with the ground truth answer, we count it as correct. The paper's EM for react agent is around 30%(gpt-3).\n",
+ "\n",
+ "2. Fuzzy Match(FM)\n",
+ "EM doesn't make much sense in question and answering. So we propose fuzzy match based on the LLMs' lengthy output nature. If the ground truth answer is included in the agent response, then we count it as correct. FM is not necessarily correct. \n",
+ "E.g. question: Harry Potter and Dumbledore, who is older? ground truth: ``dumbledore``, pred answer: ``harry potter is older than dumbledore.``\n",
+ "the model mentioned the groud truth but still provide wrong answer. So FM serves as reference.\n",
+ "\n",
+ "Let's use ``LightRAG eval`` module and evaluate on 10 questions and keep the model's practice to set ``max_step`` at `7`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[31m2024-12-19 13:59:56 - [react.py:285:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:59:57 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for the nationalities of Scott Derrickson and Ed Wood.', action=\"search(entity='Scott Derrickson')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Scott Derrickson'}), observation='Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012), and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:59:58 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I will search for the nationality of Ed Wood.', action=\"search(entity='Ed Wood')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Ed Wood'}), observation=\"Edward Davis Wood Jr. (October 10, 1924\\xa0– December 10, 1978) was an American filmmaker, actor, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 13:59:59 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I have finished the search for the nationalities of Scott Derrickson and Ed Wood.', action='finish(answer=\"Scott Derrickson is American and Ed Wood was also American.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Scott Derrickson is American and Ed Wood was also American.'}), observation='Scott Derrickson is American and Ed Wood was also American.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 13:59:59 - [react.py:299:call] - answer:\n",
+ " Scott Derrickson is American and Ed Wood was also American.\u001b[0m\n",
+ "\u001b[33m2024-12-19 13:59:59 - [2823432734.py:31:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: scott derrickson is american and ed wood was also american\u001b[0m\n",
+ "\u001b[31m2024-12-19 13:59:59 - [react.py:285:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:00 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for the government position held by the woman who portrayed Corliss Archer in the film Kiss and Tell.', action=\"search(entity='Corliss Archer')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Corliss Archer'}), observation='Fictional teenage girl Corliss Archer is the lead character in a series of American short stories written by F. Hugh Herbert starting in 1943. She also appears in these derivative works:. Main pageContentsCurrent eventsRandom articleAbout WikipediaContact us. HelpLearn to editCommunity portalRecent changesUpload file.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:02 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I will search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action=\"search(entity='Kiss and Tell film cast')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Kiss and Tell film cast'}), observation=\"Could not find exact page for 'Kiss and Tell film cast'. Similar topics: ['Kiss & Tell (1997 film)', 'Kiss Me (2014 film)', 'Kiss Kiss Bang Bang', 'Kiss and Tell (1945 film)', 'Kiss the Girls (1997 film)']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:04 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I will search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action=\"search(entity='Kiss and Tell (1945 film) cast')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Kiss and Tell (1945 film) cast'}), observation=\"Could not find exact page for 'Kiss and Tell (1945 film) cast'. Similar topics: ['Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Kiss Them for Me (film)', 'A Kiss for Corliss', 'State Fair (1945 film)']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:05 - [react.py:265:_run_one_step] - Step 4: \n",
+ "StepOutput(step=4, action=FunctionExpression(question=None, thought='I have finished the search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='finish(answer=\"Unfortunately, I couldn\\'t find the specific information about the woman who portrayed Corliss Archer in the film Kiss and Tell.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"Unfortunately, I couldn't find the specific information about the woman who portrayed Corliss Archer in the film Kiss and Tell.\"}), observation=\"Unfortunately, I couldn't find the specific information about the woman who portrayed Corliss Archer in the film Kiss and Tell.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:05 - [react.py:299:call] - answer:\n",
+ " Unfortunately, I couldn't find the specific information about the woman who portrayed Corliss Archer in the film Kiss and Tell.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:05 - [2823432734.py:31:] - No. 2, question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: unfortunately i couldnt find specific information about woman who portrayed corliss archer in film kiss and tell\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:05 - [react.py:285:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:07 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for the science fantasy young adult series described by the user.', action=\"search(entity='science fantasy young adult series')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'science fantasy young adult series'}), observation=\"Could not find exact page for 'science fantasy young adult series'. Similar topics: ['Ballantine Adult Fantasy series', 'Red Queen (novel)', 'Brandon Sanderson', 'The Mortal Instruments', 'V. E. Schwab']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:09 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I will search for companion books narrating the stories of enslaved worlds and alien species related to the previous search.', action=\"search(entity='companion books enslaved worlds alien species')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'companion books enslaved worlds alien species'}), observation=\"Could not find exact page for 'companion books enslaved worlds alien species'. Similar topics: ['List of Doctor Who universe creatures and aliens', 'List of Star Wars species (P–T)', 'List of companions in Doctor Who spin-offs', 'Extraterrestrial life', 'List of Known Space characters']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:11 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I will search for companion books narrating the stories of enslaved worlds and alien species related to the previous search.', action=\"search(entity='companion books narrating stories enslaved worlds alien species')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'companion books narrating stories enslaved worlds alien species'}), observation=\"Could not find exact page for 'companion books narrating stories enslaved worlds alien species'. Similar topics: ['Animorphs', 'Eighth Doctor', 'Apocalyptic and post-apocalyptic fiction', 'Barsoom', 'Wicked Witch of the West']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:13 - [react.py:265:_run_one_step] - Step 4: \n",
+ "StepOutput(step=4, action=FunctionExpression(question=None, thought='I have exhausted the search for the specific series and companion books. I will use the LLM tool to provide a relevant answer.', action=\"llm_tool(input='What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?'}), observation='The science fantasy young adult series you are referring to is \"The Lorien Legacies\" series by Pittacus Lore. The companion books you mentioned are called \"The Lost Files,\" which narrate the stories of enslaved worlds and alien species in the same universe as the main series.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:14 - [react.py:265:_run_one_step] - Step 5: \n",
+ "StepOutput(step=5, action=FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"The science fantasy young adult series you are referring to is \\'The Lorien Legacies\\' series by Pittacus Lore. The companion books you mentioned are called \\'The Lost Files,\\' which narrate the stories of enslaved worlds and alien species in the same universe as the main series.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"The science fantasy young adult series you are referring to is 'The Lorien Legacies' series by Pittacus Lore. The companion books you mentioned are called 'The Lost Files,' which narrate the stories of enslaved worlds and alien species in the same universe as the main series.\"}), observation=\"The science fantasy young adult series you are referring to is 'The Lorien Legacies' series by Pittacus Lore. The companion books you mentioned are called 'The Lost Files,' which narrate the stories of enslaved worlds and alien species in the same universe as the main series.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:14 - [react.py:299:call] - answer:\n",
+ " The science fantasy young adult series you are referring to is 'The Lorien Legacies' series by Pittacus Lore. The companion books you mentioned are called 'The Lost Files,' which narrate the stories of enslaved worlds and alien species in the same universe as the main series.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:14 - [2823432734.py:31:] - No. 3, question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: science fantasy young adult series you are referring to is lorien legacies series by pittacus lore companion books you mentioned are called lost files which narrate stories of enslaved worlds and alien species in same universe as main series\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:14 - [react.py:285:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:16 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for information about the locations of Laleli Mosque and Esma Sultan Mansion.', action=\"search(entity='Laleli Mosque')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Laleli Mosque'}), observation=\"The Laleli Mosque (Turkish: Laleli Camii, lit.\\u2009'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:17 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I will search for information about the location of Esma Sultan Mansion.', action=\"search(entity='Esma Sultan Mansion')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Esma Sultan Mansion'}), observation='The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:18 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.'}), observation='Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:18 - [react.py:299:call] - answer:\n",
+ " Yes, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. Laleli Mosque is in Laleli, Fatih, Istanbul, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:18 - [2823432734.py:31:] - No. 4, question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: yes laleli mosque and esma sultan mansion are not located in same neighborhood laleli mosque is in laleli fatih istanbul while esma sultan mansion is in ortaköy neighborhood of istanbul\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:18 - [react.py:285:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:19 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought=\"I will search for the director of the romantic comedy 'Big Stone Gap' on Wikipedia.\", action=\"search(entity='Big Stone Gap (film)')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Big Stone Gap (film)'}), observation=\"Big Stone Gap is a 2014 American drama romantic comedy film written and directed by Adriana Trigiani and produced by Donna Gigliotti for Altar Identity Studios, a subsidiary of Media Society. Based on Trigiani's 2000 best-selling novel of the same name, the story is set in the actual Virginia town of Big Stone Gap circa 1970s. The film had its world premiere at the Virginia Film Festival on November 6, 2014.[2]. The film was released on October 9, 2015, by Picturehouse.[3] The film was released in Blu-Ray by Universal Pictures Home Entertainment on February 2, 2016.. In 1978, 40-year-old independent woman Ave Maria Mulligan owns her dead father's pharmacy in her hometown of Big Stone Gap, Virginia.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:20 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought=\"I will search for the director of the romantic comedy 'Big Stone Gap' on Wikipedia.\", action=\"search(entity='Adriana Trigiani')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Adriana Trigiani'}), observation=\"Adriana Trigiani is an American best-selling author of eighteen books, playwright, television writer/producer, film director/screenwriter/producer, and entrepreneur based in New York City. Trigiani has published a novel a year since 2000.[1]. Trigiani graduated from Saint Mary's College in Indiana in 1981.[2]. Inspired by her Italian American heritage and Appalachian childhood in Big Stone Gap, Virginia, Trigiani arrived in New York in 1985. Trigiani made her off-Broadway debut in New York City as a playwright in 1985 at the Manhattan Theater Club with Secrets of the Lava Lamp, directed by Stuart Ross.[3] From 1988 to 1998, she created scripts for television sitcoms, including The Cosby Show (1984) and its spin-off A Different World (1987).\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:21 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I have finished the task.', action=\"finish(answer='Adriana Trigiani is the director of the romantic comedy film Big Stone Gap and is based in New York City.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Adriana Trigiani is the director of the romantic comedy film Big Stone Gap and is based in New York City.'}), observation='Adriana Trigiani is the director of the romantic comedy film Big Stone Gap and is based in New York City.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:21 - [react.py:299:call] - answer:\n",
+ " Adriana Trigiani is the director of the romantic comedy film Big Stone Gap and is based in New York City.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:21 - [2823432734.py:31:] - No. 5, question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: adriana trigiani is director of romantic comedy film big stone gap and is based in new york city\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:21 - [react.py:285:call] - input_query: 2014 S/S is the debut album of a South Korean boy group that was formed by who?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:22 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for information about the debut album of the South Korean boy group.', action=\"search(entity='2014 S/S (album)')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': '2014 S/S (album)'}), observation='2014 S/S is the debut album of South Korean group Winner. It was released on August 12, 2014, by the group\\'s record label, YG Entertainment. The members were credited for writing the lyrics and composing the majority of the album\\'s songs.[2]. The members produced the majority of the music for this album themselves, with the help of other producers such as Choice 37, B.I., Airplay, and others.[3] The album was highlighted for incorporating elements generally absent from K-pop releases, including hints of acoustic and alternative rock.[4][5]. The title track \"Empty\" became a hit in South Korea, topping the Gaon Chart and Billboard\\'s K-Pop Hot 100 chart,[6][7] and topping 7 other South Korean charts.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:24 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question='The question to ask the LLM (Optional[str]) (optional)', thought='I will find out who formed the South Korean boy group Winner.', action=\"search(entity='Winner (group)')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Winner (group)'}), observation=\"Could not find exact page for 'Winner (group)'. Similar topics: ['2026 FIFA World Cup', '2025 FIFA Club World Cup', '2025 Africa Cup of Nations', 'Reality Winner', 'Dana Winner']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:26 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I will search for the formation of the South Korean boy group Winner through another related search.', action=\"search(entity='Winner (South Korean group) formation')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Winner (South Korean group) formation'}), observation=\"Could not find exact page for 'Winner (South Korean group) formation'. Similar topics: ['Blackpink', 'Winner (band)', 'Babymonster', 'BigBang (South Korean band)', 'Seventeen (South Korean band)']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:27 - [react.py:265:_run_one_step] - Step 4: \n",
+ "StepOutput(step=4, action=FunctionExpression(question=None, thought='I have exhausted the search options related to the formation of Winner. I will provide a general response.', action=\"llm_tool(input='Who formed the South Korean boy group Winner?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Who formed the South Korean boy group Winner?'}), observation='The South Korean boy group Winner was formed by YG Entertainment through the reality survival program called \"WIN: Who is Next\" in 2013.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:28 - [react.py:265:_run_one_step] - Step 5: \n",
+ "StepOutput(step=5, action=FunctionExpression(question=None, thought='I have exhausted the search options related to the formation of Winner. I will provide a general response.', action='finish(answer=\"YG Entertainment formed the South Korean boy group Winner through the reality survival program called \\'WIN: Who is Next\\' in 2013.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"YG Entertainment formed the South Korean boy group Winner through the reality survival program called 'WIN: Who is Next' in 2013.\"}), observation=\"YG Entertainment formed the South Korean boy group Winner through the reality survival program called 'WIN: Who is Next' in 2013.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:28 - [react.py:299:call] - answer:\n",
+ " YG Entertainment formed the South Korean boy group Winner through the reality survival program called 'WIN: Who is Next' in 2013.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:28 - [2823432734.py:31:] - No. 6, question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?, ground truth: yg entertainment, pred answer: yg entertainment formed south korean boy group winner through reality survival program called win who is next in 2013\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:28 - [react.py:285:call] - input_query: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:29 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='The user is asking about a person known by the stage name Aladin who helped organizations improve their performance as a consultant.', action=\"search(entity='Aladin')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Aladin'}), observation='Aladdin is a folk tale of Middle Eastern origin.. Aladdin, Aladin, and variants such as Aladdin and His Magic Lamp, may also refer to:. Main pageContentsCurrent eventsRandom articleAbout WikipediaContact us. HelpLearn to editCommunity portalRecent changesUpload file. Donate.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:31 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought=\"The search for Aladin did not provide relevant information. Let's try to find the person known by the stage name Aladin who helped organizations improve their performance as a consultant.\", action=\"search(entity='Aladin consultant')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Aladin consultant'}), observation=\"Could not find exact page for 'Aladin consultant'. Similar topics: ['2009 in film', 'Magicians (2007 film)', 'Liberal Party of Canada candidates in the 2019 Canadian federal election', 'Ashchorjyo Prodeep', 'Waleed El-Ansary']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:33 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought=\"The search for Aladin consultant did not provide relevant information. Let's try to find the person known by the stage name Aladin who helped organizations improve their performance as a consultant.\", action=\"search(entity='Aladin performance improvement consultant')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Aladin performance improvement consultant'}), observation=\"Could not find exact page for 'Aladin performance improvement consultant'. Similar topics: ['Five Pillars of Islam', 'Signal-to-noise ratio', 'Islamic Golden Age', 'Travelling salesman problem', 'Clock']\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:35 - [react.py:265:_run_one_step] - Step 4: \n",
+ "StepOutput(step=4, action=FunctionExpression(question=None, thought=\"The search for Aladin with different variations did not yield relevant information. Let's use LLM as a fallback tool to answer the query.\", action=\"llm_tool(input='Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?'}), observation='The person known by his stage name Aladin who helped organizations improve their performance as a consultant is Aladin Akyurek. He is a renowned consultant, speaker, and author who specializes in organizational development and performance improvement.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:36 - [react.py:265:_run_one_step] - Step 5: \n",
+ "StepOutput(step=5, action=FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"The person known by his stage name Aladin who helped organizations improve their performance as a consultant is Aladin Akyurek. He is a renowned consultant, speaker, and author who specializes in organizational development and performance improvement.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'The person known by his stage name Aladin who helped organizations improve their performance as a consultant is Aladin Akyurek. He is a renowned consultant, speaker, and author who specializes in organizational development and performance improvement.'}), observation='The person known by his stage name Aladin who helped organizations improve their performance as a consultant is Aladin Akyurek. He is a renowned consultant, speaker, and author who specializes in organizational development and performance improvement.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:36 - [react.py:299:call] - answer:\n",
+ " The person known by his stage name Aladin who helped organizations improve their performance as a consultant is Aladin Akyurek. He is a renowned consultant, speaker, and author who specializes in organizational development and performance improvement.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:36 - [2823432734.py:31:] - No. 7, question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?, ground truth: eenasul fateh, pred answer: person known by his stage name aladin who helped organizations improve their performance as consultant is aladin akyurek he is renowned consultant speaker and author who specializes in organizational development and performance improvement\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:36 - [react.py:285:call] - input_query: The arena where the Lewiston Maineiacs played their home games can seat how many people?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:37 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for the arena where the Lewiston Maineiacs played their home games.', action=\"search(entity='Androscoggin Bank Colisée')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Androscoggin Bank Colisée'}), observation='The Colisée, formerly Androscoggin Bank Colisée, Central Maine Youth Center, Central Maine Civic Center, and Lewiston Colisee, is a 4,000 capacity (3,677 seated) multi-purpose arena, in Lewiston, Maine, United States that was built from 1956 to 1958 and opened in the latter year to replace the Dominics Regional High School Arena and was initially constructed and operated by the Catholic parish of SS. Peter and Paul. It was used as a replacement venue for the boxing match between Muhammad Ali and Sonny Liston in 1965.. Currently, it is the home of the Maine Nordiques of the North American Hockey League. The Colisée is also used for concerts, conventions and trade shows.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:38 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I have finished the task.', action=\"finish(answer='The arena where the Lewiston Maineiacs played their home games, Androscoggin Bank Colisée, can seat 4,000 people (3,677 seated).' )\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'The arena where the Lewiston Maineiacs played their home games, Androscoggin Bank Colisée, can seat 4,000 people (3,677 seated).'}), observation='The arena where the Lewiston Maineiacs played their home games, Androscoggin Bank Colisée, can seat 4,000 people (3,677 seated).')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:38 - [react.py:299:call] - answer:\n",
+ " The arena where the Lewiston Maineiacs played their home games, Androscoggin Bank Colisée, can seat 4,000 people (3,677 seated).\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:38 - [2823432734.py:31:] - No. 8, question: The arena where the Lewiston Maineiacs played their home games can seat how many people?, ground truth: 3677 seated, pred answer: arena where lewiston maineiacs played their home games androscoggin bank colisée can seat 4000 people 3677 seated\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:38 - [react.py:285:call] - input_query: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:39 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='I will search for information about Annie Morton and Terry Richardson to determine who is older.', action=\"search(entity='Annie Morton')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Annie Morton'}), observation='Annie Morton (born October 8, 1970) is an American model[1] born in Pennsylvania.[2] She has appeared on the covers of British Vogue, ID, Marie Claire, and other magazines. She has been photographed by Helmut Newton; Peter Lindbergh; Annie Leibovitz; Richard Avedon; Juergen Teller;[3] Paul Jasmin, Mary Ellen Mark, Stephen Shames, and Terry Richardson, and modeled for Donna Karan,[4] Givenchy, Guerlain, Chanel, Harper\\'s Bazaar, Sports Illustrated and Victoria\\'s Secret.[5] A long time vegetarian, an advocate for organic lifestyle choices and natural healthcare. She co-founded Tsi-La Organics, a \"Green Luxury\" company that creates and sells vegan, organic perfume and skin care products.[6]. She has appeared on many magazine covers and has been featured in several professional photobooks, Peter Lindbergh Selected Work, the cover of Juergen Teller By Juergen Teller, Helmut Newton Pages From The Glossies, and Donna Karan\\'s book A Journey Of A Woman 20 Years.[7] In 1997 she posed for the Pirelli Calendar with Richard Avedon.. Morton has also appeared in several music videos.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:41 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I will search for information about Terry Richardson to determine his age.', action=\"search(entity='Terry Richardson')\"), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Terry Richardson'}), observation=\"Terrence Richardson (born August 14, 1965) is an American fashion and portrait photographer. He has shot advertising campaigns for Marc Jacobs, Aldo, Supreme, Sisley, Tom Ford, and Yves Saint Laurent among others, and also done work for magazines such as Rolling Stone, GQ, Vogue, Vanity Fair, Harper's Bazaar, i-D, and Vice.. Since 2001, Richardson has been accused by multiple models of sexual misconduct.[2][3][4][5] In 2017, brands and magazines that had worked with Richardson in the past began distancing themselves from him, and said they would no longer employ him.[6] He has not actively worked as a photographer since 2018.[7]. Richardson was born in New York City, the son of Norma Kessler, an actress,[8][9] and Bob Richardson, a fashion photographer who struggled with schizophrenia and drug abuse.[10] Richardson's father was Irish Catholic and his mother is Jewish.[11] Following the divorce of his parents, Richardson moved to Woodstock, New York, with his mother and stepfather, English guitarist Jackie Lomax.[8] Richardson later moved to the Hollywood neighborhood of Los Angeles, where he attended Hollywood High School.[12]. He moved with his mother to Ojai, California, where he attended Nordhoff High School, when he was 16.[13] Richardson originally wanted to be a punk rock musician rather than a photographer.[13] He played bass guitar in the punk rock band The Invisible Government for four years.[14] He played bass for a variety of other punk bands in Southern California including Signal Street Alcoholics, Doggy Style, Baby Fist and Middle Finger.[8][15].\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:42 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"Annie Morton was born on October 8, 1970, making her older than Terry Richardson who was born on August 14, 1965.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Annie Morton was born on October 8, 1970, making her older than Terry Richardson who was born on August 14, 1965.'}), observation='Annie Morton was born on October 8, 1970, making her older than Terry Richardson who was born on August 14, 1965.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:42 - [react.py:299:call] - answer:\n",
+ " Annie Morton was born on October 8, 1970, making her older than Terry Richardson who was born on August 14, 1965.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:42 - [2823432734.py:31:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth: terry richardson, pred answer: annie morton was born on october 8 1970 making her older than terry richardson who was born on august 14 1965\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:00:42 - [react.py:285:call] - input_query: Are Local H and For Against both from the United States?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:43 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='Checking the origin of the bands Local H and For Against.', action='search(entity=\"Local H\")'), function=Function(thought=None, name='search', args=[], kwargs={'entity': 'Local H'}), observation=\"Local H is an American rock band originally formed by guitarist and vocalist Scott Lucas, bassist Matt Garcia, drummer Joe Daniels, and lead guitarist John Sparkman in Zion, Illinois in 1990. The members all met in high school in 1987 and founded Local H three years later. After Sparkman's departure in 1991 and Garcia's departure in 1993, Local H continued as an unorthodox two-piece setup.. Local H signed a record contract with Island Records in 1994, where they would go on to release three albums. The band's debut album, Ham Fisted (1995), was not a success and the band was nearly dropped, but the band remained on the label long enough to release their second album As Good as Dead (1996).\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:00:44 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I have finished the task.', action='finish(answer=\"Yes, Local H is from the United States. Now, let\\'s check For Against\\'s origin.\"'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"Yes, Local H is from the United States. Now, let's check For Against's origin.\"}), observation=\"Yes, Local H is from the United States. Now, let's check For Against's origin.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:00:44 - [react.py:299:call] - answer:\n",
+ " Yes, Local H is from the United States. Now, let's check For Against's origin.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:00:44 - [2823432734.py:31:] - No. 10, question: Are Local H and For Against both from the United States?, ground truth: yes, pred answer: yes local h is from united states now lets check for againsts origin\u001b[0m\n",
+ "EM = EvaluationResult(avg_score=0.0, per_item_scores=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], additional_info=None), FM = EvaluationResult(avg_score=0.5, per_item_scores=[0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0], additional_info=None), average time = 4.769389891624451\n"
+ ]
+ }
+ ],
+ "source": [
+ "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
+ "\n",
+ "# set up evaluation type\n",
+ "EM_evaluator = AnswerMatchAcc(type=\"exact_match\")\n",
+ "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n",
+ "\n",
+ "agent = ReActAgent(\n",
+ " tools=tools,\n",
+ " max_steps=7,\n",
+ " model_client=OpenAIClient(),\n",
+ " model_kwargs=gpt_model_kwargs,\n",
+ " # preset_prompt_kwargs=preset_prompt_kwargs,\n",
+ ")\n",
+ "\n",
+ "num_questions = 10\n",
+ "gt_answers = []\n",
+ "pred_answers = []\n",
+ "start_time = time.time()\n",
+ "for i in range(num_questions):\n",
+ " question = val_dataset[i][\"question\"]\n",
+ " gt_answer = normalize_answer(\n",
+ " val_dataset[i][\"answer\"]\n",
+ " ) # normalize the ground truth answer\n",
+ " gt_answers.append(gt_answer)\n",
+ "\n",
+ " # get the agent's response\n",
+ " pred_answer = agent(question)\n",
+ " pred_answer = normalize_answer(pred_answer)\n",
+ " pred_answers.append(pred_answer)\n",
+ "\n",
+ " printc(\n",
+ " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n",
+ " color=\"yellow\",\n",
+ " )\n",
+ "\n",
+ "end_time = time.time()\n",
+ "\n",
+ "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
+ "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
+ "avg_time = (end_time - start_time) / num_questions\n",
+ "\n",
+ "print(f\"EM = {em}, FM = {fm}, average time = {avg_time}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The EM is 0.3 and the FM is 0.6. Each query takes 9s in average. (The performance also depends on the success of wikipedia query connection.)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "What if we use simple LLM models to answer these questions? To test on this, we just need to remove the tools. We have a built-in ``llm_tool`` and ``finish`` that automatically handles the query. ``llm_tool`` uses the same model with the agent. We can't add the examples because the examples will mislead the agent to use non-existing tools."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[31m2024-12-19 14:01:27 - [react.py:285:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:29 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='Checking the nationality of Scott Derrickson and Ed Wood.', action=\"llm_tool(input='What is the nationality of Scott Derrickson?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'What is the nationality of Scott Derrickson?'}), observation='Scott Derrickson is an American filmmaker.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:30 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='Checking the nationality of Ed Wood.', action=\"llm_tool(input='What is the nationality of Ed Wood?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'What is the nationality of Ed Wood?'}), observation='Ed Wood was an American filmmaker.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:31 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought='Confirming if Scott Derrickson and Ed Wood were of the same nationality.', action=\"finish(answer='Yes, both Scott Derrickson and Ed Wood were American filmmakers, so they were of the same nationality.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Yes, both Scott Derrickson and Ed Wood were American filmmakers, so they were of the same nationality.'}), observation='Yes, both Scott Derrickson and Ed Wood were American filmmakers, so they were of the same nationality.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:31 - [react.py:299:call] - answer:\n",
+ " Yes, both Scott Derrickson and Ed Wood were American filmmakers, so they were of the same nationality.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:31 - [3974127232.py:27:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: yes both scott derrickson and ed wood were american filmmakers so they were of same nationality\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:31 - [react.py:285:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:32 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='The user is asking about the government position held by the woman who portrayed Corliss Archer in the film Kiss and Tell.', action=\"llm_tool(input='government position held by the woman who portrayed Corliss Archer in the film Kiss and Tell')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'government position held by the woman who portrayed Corliss Archer in the film Kiss and Tell'}), observation='The government position held by the woman who portrayed Corliss Archer in the film \"Kiss and Tell\" is Shirley Temple. She served as the United States Ambassador to Ghana and later to Czechoslovakia.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:33 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='The user is asking about the government position held by the woman who portrayed Corliss Archer in the film Kiss and Tell.', action=\"finish(answer='Shirley Temple served as the United States Ambassador to Ghana and later to Czechoslovakia.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Shirley Temple served as the United States Ambassador to Ghana and later to Czechoslovakia.'}), observation='Shirley Temple served as the United States Ambassador to Ghana and later to Czechoslovakia.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:33 - [react.py:299:call] - answer:\n",
+ " Shirley Temple served as the United States Ambassador to Ghana and later to Czechoslovakia.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:33 - [3974127232.py:27:] - No. 2, question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: shirley temple served as united states ambassador to ghana and later to czechoslovakia\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:33 - [react.py:285:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:36 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='Identifying the science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species.', action=\"llm_tool(input='Science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species'}), observation='I recommend checking out the \"Lorien Legacies\" series by Pittacus Lore. This science fiction young adult series follows the story of nine alien teenagers who escape their home planet, Lorien, which has been invaded by the evil Mogadorians. Each book in the series focuses on one of the teenagers and their journey to save Earth and their own species. Additionally, there are companion books that delve into the histories of the enslaved worlds and alien species featured in the main series.')\n",
+ "_______\n",
+ "\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Error at parsing JSON string: Got invalid JSON object with yaml.safe_load. Error: while parsing a flow mapping\n",
+ " in \"\", line 1, column 1:\n",
+ " {\n",
+ " ^\n",
+ "expected ',' or '}', but got ''\n",
+ " in \"\", line 4, column 61:\n",
+ " ... ='I recommend checking out the \"Lorien Legacies\" series by Pitta ... \n",
+ " ^. Got JSON string: {\n",
+ " \"question\": null,\n",
+ " \"thought\": \"Providing the information about the science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species.\",\n",
+ " \"action\": \"finish(answer='I recommend checking out the \"Lorien Legacies\" series by Pittacus Lore. This science fiction young adult series follows the story of nine alien teenagers who escape their home planet, Lorien, which has been invaded by the evil Mogadorians. Each book in the series focuses on one of the teenagers and their journey to save Earth and their own species. Additionally, there are companion books that delve into the histories of the enslaved worlds and alien species featured in the main series.')\"\n",
+ "}\n",
+ "Error in parsing JSON to JSON: Error: Got invalid JSON object with yaml.safe_load. Error: while parsing a flow mapping\n",
+ " in \"\", line 1, column 1:\n",
+ " {\n",
+ " ^\n",
+ "expected ',' or '}', but got ''\n",
+ " in \"\", line 4, column 61:\n",
+ " ... ='I recommend checking out the \"Lorien Legacies\" series by Pitta ... \n",
+ " ^. Got JSON string: {\n",
+ " \"question\": null,\n",
+ " \"thought\": \"Providing the information about the science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species.\",\n",
+ " \"action\": \"finish(answer='I recommend checking out the \"Lorien Legacies\" series by Pittacus Lore. This science fiction young adult series follows the story of nine alien teenagers who escape their home planet, Lorien, which has been invaded by the evil Mogadorians. Each book in the series focuses on one of the teenagers and their journey to save Earth and their own species. Additionally, there are companion books that delve into the histories of the enslaved worlds and alien species featured in the main series.')\"\n",
+ "}\n",
+ "Error processing the output processors: Error: Got invalid JSON object with yaml.safe_load. Error: while parsing a flow mapping\n",
+ " in \"\", line 1, column 1:\n",
+ " {\n",
+ " ^\n",
+ "expected ',' or '}', but got ''\n",
+ " in \"\", line 4, column 61:\n",
+ " ... ='I recommend checking out the \"Lorien Legacies\" series by Pitta ... \n",
+ " ^. Got JSON string: {\n",
+ " \"question\": null,\n",
+ " \"thought\": \"Providing the information about the science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species.\",\n",
+ " \"action\": \"finish(answer='I recommend checking out the \"Lorien Legacies\" series by Pittacus Lore. This science fiction young adult series follows the story of nine alien teenagers who escape their home planet, Lorien, which has been invaded by the evil Mogadorians. Each book in the series focuses on one of the teenagers and their journey to save Earth and their own species. Additionally, there are companion books that delve into the histories of the enslaved worlds and alien species featured in the main series.')\"\n",
+ "}\n",
+ "Error planning step 2: Error: Got invalid JSON object with yaml.safe_load. Error: while parsing a flow mapping\n",
+ " in \"\", line 1, column 1:\n",
+ " {\n",
+ " ^\n",
+ "expected ',' or '}', but got ''\n",
+ " in \"\", line 4, column 61:\n",
+ " ... ='I recommend checking out the \"Lorien Legacies\" series by Pitta ... \n",
+ " ^. Got JSON string: {\n",
+ " \"question\": null,\n",
+ " \"thought\": \"Providing the information about the science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species.\",\n",
+ " \"action\": \"finish(answer='I recommend checking out the \"Lorien Legacies\" series by Pittacus Lore. This science fiction young adult series follows the story of nine alien teenagers who escape their home planet, Lorien, which has been invaded by the evil Mogadorians. Each book in the series focuses on one of the teenagers and their journey to save Earth and their own species. Additionally, there are companion books that delve into the histories of the enslaved worlds and alien species featured in the main series.')\"\n",
+ "}\n",
+ "Error running step 3: Error rendering Jinja2 template: 'None' has no attribute 'thought'\n",
+ "Error running step 4: Error rendering Jinja2 template: 'None' has no attribute 'thought'\n",
+ "Error running step 5: Error rendering Jinja2 template: 'None' has no attribute 'thought'\n",
+ "Error running step 6: Error rendering Jinja2 template: 'None' has no attribute 'thought'\n",
+ "Error running step 7: Error rendering Jinja2 template: 'None' has no attribute 'thought'\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m2024-12-19 14:01:37 - [react.py:299:call] - answer:\n",
+ " Error planning step 2: Error: Got invalid JSON object with yaml.safe_load. Error: while parsing a flow mapping\n",
+ " in \"\", line 1, column 1:\n",
+ " {\n",
+ " ^\n",
+ "expected ',' or '}', but got ''\n",
+ " in \"\", line 4, column 61:\n",
+ " ... ='I recommend checking out the \"Lorien Legacies\" series by Pitta ... \n",
+ " ^. Got JSON string: {\n",
+ " \"question\": null,\n",
+ " \"thought\": \"Providing the information about the science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species.\",\n",
+ " \"action\": \"finish(answer='I recommend checking out the \"Lorien Legacies\" series by Pittacus Lore. This science fiction young adult series follows the story of nine alien teenagers who escape their home planet, Lorien, which has been invaded by the evil Mogadorians. Each book in the series focuses on one of the teenagers and their journey to save Earth and their own species. Additionally, there are companion books that delve into the histories of the enslaved worlds and alien species featured in the main series.')\"\n",
+ "}\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:37 - [3974127232.py:27:] - No. 3, question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: error planning step 2 error got invalid json object with yamlsafeload error while parsing flow mapping in unicode string line 1 column 1 expected or but got scalar in unicode string line 4 column 61 i recommend checking out lorien legacies series by pitta got json string question null thought providing information about science fantasy young adult series with companion books narrating stories of enslaved worlds and alien species action finishansweri recommend checking out lorien legacies series by pittacus lore this science fiction young adult series follows story of nine alien teenagers who escape their home planet lorien which has been invaded by evil mogadorians each book in series focuses on one of teenagers and their journey to save earth and their own species additionally there are companion books that delve into histories of enslaved worlds and alien species featured in main series\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:37 - [react.py:285:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:39 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='Checking the locations of Laleli Mosque and Esma Sultan Mansion.', action=\"llm_tool(input='Are Laleli Mosque and Esma Sultan Mansion in the same neighborhood?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Are Laleli Mosque and Esma Sultan Mansion in the same neighborhood?'}), observation='No, Laleli Mosque and Esma Sultan Mansion are not in the same neighborhood. Laleli Mosque is located in the Laleli district of Istanbul, while Esma Sultan Mansion is located in the Ortaköy district. They are in different neighborhoods within Istanbul.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:40 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='Confirming the locations of Laleli Mosque and Esma Sultan Mansion.', action=\"finish(answer='No, Laleli Mosque is in the Laleli district of Istanbul, and Esma Sultan Mansion is in the Ortaköy district. They are in different neighborhoods within Istanbul.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'No, Laleli Mosque is in the Laleli district of Istanbul, and Esma Sultan Mansion is in the Ortaköy district. They are in different neighborhoods within Istanbul.'}), observation='No, Laleli Mosque is in the Laleli district of Istanbul, and Esma Sultan Mansion is in the Ortaköy district. They are in different neighborhoods within Istanbul.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:40 - [react.py:299:call] - answer:\n",
+ " No, Laleli Mosque is in the Laleli district of Istanbul, and Esma Sultan Mansion is in the Ortaköy district. They are in different neighborhoods within Istanbul.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:40 - [3974127232.py:27:] - No. 4, question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: no laleli mosque is in laleli district of istanbul and esma sultan mansion is in ortaköy district they are in different neighborhoods within istanbul\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:40 - [react.py:285:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:42 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought=\"The user is asking about the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action=\"llm_tool(input='director of the romantic comedy Big Stone Gap and New York city it is based in')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'director of the romantic comedy Big Stone Gap and New York city it is based in'}), observation='\"Big Stone Gap\" is a romantic comedy film directed by Adriana Trigiani. The story is set in a small town in Virginia, not New York City. The film follows the life of a woman named Ave Maria Mulligan, played by Ashley Judd, as she navigates love, family, and community in the charming town of Big Stone Gap.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:43 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought=\"The user is asking about the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action=\"finish(answer='The director of the romantic comedy movie Big Stone Gap is Adriana Trigiani. The movie is set in a small town in Virginia, not New York City.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'The director of the romantic comedy movie Big Stone Gap is Adriana Trigiani. The movie is set in a small town in Virginia, not New York City.'}), observation='The director of the romantic comedy movie Big Stone Gap is Adriana Trigiani. The movie is set in a small town in Virginia, not New York City.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:43 - [react.py:299:call] - answer:\n",
+ " The director of the romantic comedy movie Big Stone Gap is Adriana Trigiani. The movie is set in a small town in Virginia, not New York City.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:43 - [3974127232.py:27:] - No. 5, question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: director of romantic comedy movie big stone gap is adriana trigiani movie is set in small town in virginia not new york city\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:43 - [react.py:285:call] - input_query: 2014 S/S is the debut album of a South Korean boy group that was formed by who?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:44 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='The user is asking about the debut album of a South Korean boy group and who formed it.', action=\"llm_tool(input='2014 S/S is the debut album of a South Korean boy group that was formed by who?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': '2014 S/S is the debut album of a South Korean boy group that was formed by who?'}), observation='2014 S/S is the debut album of the South Korean boy group WINNER, which was formed by YG Entertainment through the reality survival program WIN: Who is Next.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:45 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought=\"Providing the information about the debut album and the group's formation.\", action=\"finish(answer='2014 S/S is the debut album of the South Korean boy group WINNER, which was formed by YG Entertainment through the reality survival program WIN: Who is Next.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': '2014 S/S is the debut album of the South Korean boy group WINNER, which was formed by YG Entertainment through the reality survival program WIN: Who is Next.'}), observation='2014 S/S is the debut album of the South Korean boy group WINNER, which was formed by YG Entertainment through the reality survival program WIN: Who is Next.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:45 - [react.py:299:call] - answer:\n",
+ " 2014 S/S is the debut album of the South Korean boy group WINNER, which was formed by YG Entertainment through the reality survival program WIN: Who is Next.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:45 - [3974127232.py:27:] - No. 6, question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?, ground truth: yg entertainment, pred answer: 2014 ss is debut album of south korean boy group winner which was formed by yg entertainment through reality survival program win who is next\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:45 - [react.py:285:call] - input_query: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:48 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='The user is asking about a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.', action=\"llm_tool(input='Aladin stage name consultant organizations performance improve')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Aladin stage name consultant organizations performance improve'}), observation='It sounds like you are looking for advice on how to improve the performance of organizations that provide stage name consulting services, like Aladin. Here are some suggestions to help enhance their performance:\\n\\n1. Develop a strong brand identity: Help organizations establish a unique and memorable brand identity that sets them apart from competitors.\\n\\n2. Enhance customer experience: Focus on providing exceptional customer service and personalized consultations to ensure client satisfaction.\\n\\n3. Expand service offerings: Consider offering additional services such as social media management, image consulting, or talent development to attract a wider range of clients.\\n\\n4. Invest in marketing and promotion: Utilize digital marketing strategies, social media platforms, and partnerships with influencers to increase visibility and attract new clients.\\n\\n5. Professional development: Provide ongoing training and development opportunities for consultants to stay updated on industry trends and best practices.\\n\\nBy implementing these strategies, organizations offering stage name consulting services can improve their performance and attract more clients.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:49 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='The user is looking for information about a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.', action='finish(answer=\"Aladin is known by his stage name and helped organizations improve their performance as a consultant.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'Aladin is known by his stage name and helped organizations improve their performance as a consultant.'}), observation='Aladin is known by his stage name and helped organizations improve their performance as a consultant.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:49 - [react.py:299:call] - answer:\n",
+ " Aladin is known by his stage name and helped organizations improve their performance as a consultant.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:49 - [3974127232.py:27:] - No. 7, question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?, ground truth: eenasul fateh, pred answer: aladin is known by his stage name and helped organizations improve their performance as consultant\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:49 - [react.py:285:call] - input_query: The arena where the Lewiston Maineiacs played their home games can seat how many people?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:50 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='The user is asking about the seating capacity of the arena where the Lewiston Maineiacs played their home games.', action=\"llm_tool(input='The arena where the Lewiston Maineiacs played their home games can seat how many people?')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'The arena where the Lewiston Maineiacs played their home games can seat how many people?'}), observation='The arena where the Lewiston Maineiacs played their home games was the Androscoggin Bank Colisée, which had a seating capacity of approximately 3,677 people for hockey games.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:51 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='I have finished the task.', action=\"finish(answer='The Androscoggin Bank Colisée, where the Lewiston Maineiacs played their home games, had a seating capacity of approximately 3,677 people for hockey games.')\"), function=Function(thought=None, name='finish', args=[], kwargs={'answer': 'The Androscoggin Bank Colisée, where the Lewiston Maineiacs played their home games, had a seating capacity of approximately 3,677 people for hockey games.'}), observation='The Androscoggin Bank Colisée, where the Lewiston Maineiacs played their home games, had a seating capacity of approximately 3,677 people for hockey games.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:51 - [react.py:299:call] - answer:\n",
+ " The Androscoggin Bank Colisée, where the Lewiston Maineiacs played their home games, had a seating capacity of approximately 3,677 people for hockey games.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:51 - [3974127232.py:27:] - No. 8, question: The arena where the Lewiston Maineiacs played their home games can seat how many people?, ground truth: 3677 seated, pred answer: androscoggin bank colisée where lewiston maineiacs played their home games had seating capacity of approximately 3677 people for hockey games\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:51 - [react.py:285:call] - input_query: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:52 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='Identifying the ages of Annie Morton and Terry Richardson.', action=\"llm_tool(input='Annie Morton age') (str)\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Annie Morton age'}), observation=\"I'm sorry, but I cannot provide personal information such as age without more context or details. How can I assist you further?\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:54 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='Comparing the ages of Annie Morton and Terry Richardson to determine who is older.', action=\"llm_tool(input='Terry Richardson age') (str)\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Terry Richardson age'}), observation=\"I'm sorry, but I cannot provide real-time information on individuals. Terry Richardson is a well-known American fashion photographer, but I do not have access to his current age.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:54 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought=\"Unfortunately, I couldn't retrieve the ages of Annie Morton and Terry Richardson.\", action='finish(answer=\"I don\\'t have access to the specific ages of Annie Morton and Terry Richardson.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"I don't have access to the specific ages of Annie Morton and Terry Richardson.\"}), observation=\"I don't have access to the specific ages of Annie Morton and Terry Richardson.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:54 - [react.py:299:call] - answer:\n",
+ " I don't have access to the specific ages of Annie Morton and Terry Richardson.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:54 - [3974127232.py:27:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth: terry richardson, pred answer: i dont have access to specific ages of annie morton and terry richardson\u001b[0m\n",
+ "\u001b[31m2024-12-19 14:01:54 - [react.py:285:call] - input_query: Are Local H and For Against both from the United States?\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:56 - [react.py:265:_run_one_step] - Step 1: \n",
+ "StepOutput(step=1, action=FunctionExpression(question=None, thought='Checking the origin of the bands Local H and For Against.', action='llm_tool(input=\"Local H origin\")'), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'Local H origin'}), observation='Local H is an American rock band hailing from Zion, Illinois. The band was formed in 1987 by Scott Lucas (vocals, guitar, bass) and Joe Daniels (drums). They are known for their energetic live performances and their blend of alternative rock, grunge, and punk influences.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:58 - [react.py:265:_run_one_step] - Step 2: \n",
+ "StepOutput(step=2, action=FunctionExpression(question=None, thought='Checking the origin of the band For Against.', action=\"llm_tool(input='For Against origin')\"), function=Function(thought=None, name='llm_tool', args=[], kwargs={'input': 'For Against origin'}), observation='Sure, here are some arguments for and against the concept of origin:\\n\\nFor:\\n1. Understanding the origin of something can provide valuable insights into its nature and purpose.\\n2. Knowing the origin of a product or idea can help in determining its authenticity and quality.\\n3. Studying origins can help in tracing the development and evolution of various phenomena.\\n\\nAgainst:\\n1. Focusing too much on origin can lead to oversimplification and overlook the complexity of a subject.\\n2. The origin of something may not always be clear or easily traceable, leading to speculation and uncertainty.\\n3. Placing too much emphasis on origin can limit our understanding and appreciation of the present and future aspects of a subject.')\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[34m2024-12-19 14:01:59 - [react.py:265:_run_one_step] - Step 3: \n",
+ "StepOutput(step=3, action=FunctionExpression(question=None, thought=\"Combining the origins of Local H and For Against to answer the user's query.\", action='finish(answer=\"Yes, Local H is from the United States (Zion, Illinois) and For Against\\'s origin is not specified in the response.\")'), function=Function(thought=None, name='finish', args=[], kwargs={'answer': \"Yes, Local H is from the United States (Zion, Illinois) and For Against's origin is not specified in the response.\"}), observation=\"Yes, Local H is from the United States (Zion, Illinois) and For Against's origin is not specified in the response.\")\n",
+ "_______\n",
+ "\u001b[0m\n",
+ "\u001b[32m2024-12-19 14:01:59 - [react.py:299:call] - answer:\n",
+ " Yes, Local H is from the United States (Zion, Illinois) and For Against's origin is not specified in the response.\u001b[0m\n",
+ "\u001b[33m2024-12-19 14:01:59 - [3974127232.py:27:] - No. 10, question: Are Local H and For Against both from the United States?, ground truth: yes, pred answer: yes local h is from united states zion illinois and for againsts origin is not specified in response\u001b[0m\n",
+ "EM = EvaluationResult(avg_score=0.0, per_item_scores=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], additional_info=None), FM = EvaluationResult(avg_score=0.5, per_item_scores=[1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], additional_info=None), average time = 3.1863945960998534\n"
+ ]
+ }
+ ],
+ "source": [
+ "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
+ "\n",
+ "# set up evaluation type\n",
+ "EM_evaluator = AnswerMatchAcc(type=\"exact_match\")\n",
+ "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n",
+ "\n",
+ "agent = ReActAgent(\n",
+ " max_steps=7, model_client=OpenAIClient(), model_kwargs=gpt_model_kwargs\n",
+ ")\n",
+ "\n",
+ "num_questions = 10\n",
+ "gt_answers = []\n",
+ "pred_answers = []\n",
+ "start_time = time.time()\n",
+ "for i in range(num_questions):\n",
+ " question = val_dataset[i][\"question\"]\n",
+ " gt_answer = normalize_answer(\n",
+ " val_dataset[i][\"answer\"]\n",
+ " ) # normalize the ground truth answer\n",
+ " gt_answers.append(gt_answer)\n",
+ "\n",
+ " # get the agent's response\n",
+ " pred_answer = agent(question)\n",
+ " pred_answer = normalize_answer(pred_answer)\n",
+ " pred_answers.append(pred_answer)\n",
+ "\n",
+ " printc(\n",
+ " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n",
+ " color=\"yellow\",\n",
+ " )\n",
+ "\n",
+ "end_time = time.time()\n",
+ "\n",
+ "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
+ "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n",
+ "avg_time = (end_time - start_time) / num_questions\n",
+ "\n",
+ "print(f\"EM = {em}, FM = {fm}, average time = {avg_time}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Without the tools and examples, EM=0 and FM=0.4. We saw hallucinations and nonsense:\n",
+ "\n",
+ "2024-06-15 23:17:04 - [3230041225.py:26:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: ``yes``, pred answer: ``no scott derrickson and ed wood were not of same nationality scott derrickson is american while ed wood was also american``\n",
+ "\n",
+ "2024-06-15 23:18:16 - [3230041225.py:26:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth:`` terry richardson``, pred answer: ``who is older annie morton or terry richardson``\n",
+ "\n",
+ "Therefore, using ReAct agent outperforms the base LLM.\n",
+ "Meanwhile, ``LightRAG ReAct agent`` shows that the performance on 10 questions(EM=0.3)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 7. Future Improvement"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# TODO:\n",
+ "# 1. advanced, add history to react\n",
+ "# 2. add training, few shot\n",
+ "# 3. llm as judge\n",
+ "# 4. add picture\n",
+ "# 5. better json handling, we need to store the answer output"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "my-project-kernel",
+ "language": "python",
+ "name": "my-project-kernel"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/use_cases/classification/train.py b/use_cases/classification/train.py
index 0bdbd562..68a71fdb 100644
--- a/use_cases/classification/train.py
+++ b/use_cases/classification/train.py
@@ -11,6 +11,7 @@
gpt_3_model,
gpt_4o_model,
)
+from adalflow.core.generator import BackwardPassSetup
class TrecClassifierAdal(adal.AdalComponent):
@@ -26,7 +27,7 @@ def __init__(
eval_fn = AnswerMatchAcc(type="exact_match").compute_single_item
loss_fn = adal.EvalFnToTextLoss(
eval_fn=eval_fn,
- eval_fn_desc="exact_match: 1 if str(y) == str(y_gt) else 0",
+ eval_fn_desc="exact_match: 1 if str(y) == str(y_gt) else 0. When the LLM prediction failed with format parsing which results with errors, we set y_pred = -1",
)
super().__init__(
task=task,
@@ -51,8 +52,8 @@ def prepare_eval(
def prepare_loss(
self, sample: TRECExtendedData, y_pred: adal.Parameter, *args, **kwargs
) -> Tuple[Callable[..., Any], Dict]:
- full_response = y_pred.full_response
- y_label = -1
+ full_response = y_pred.data
+ y_label = -1 # default value for failed prediction
if (
full_response
and full_response.data is not None
@@ -67,7 +68,11 @@ def prepare_loss(
eval_input=sample.class_name,
requires_opt=False,
)
- return self.loss_fn, {"kwargs": {"y": y_pred, "y_gt": y_gt}}
+ return self.loss_fn, {
+ "kwargs": {"y": y_pred, "y_gt": y_gt},
+ "id": sample.id,
+ "gt": y_gt.eval_input,
+ }
def train(
@@ -81,6 +86,9 @@ def train(
strategy="constrained",
optimization_order="sequential",
debug=False,
+ seed=None,
+ tg: bool = False,
+ max_proposals_per_step: int = 5,
):
# TODO: ensure the teacher prompt gets updated with the new model
adal_component = TrecClassifierAdal(
@@ -90,6 +98,12 @@ def train(
backward_engine_model_config=gpt_4o_model,
teacher_model_config=gpt_4o_model,
)
+ backward_pass_setup = None
+ if tg:
+ backward_pass_setup = BackwardPassSetup(
+ all_pred_at_once=False,
+ compute_grad_for_errors_only=False,
+ )
print(adal_component)
trainer = adal.Trainer(
train_batch_size=train_batch_size,
@@ -103,50 +117,67 @@ def train(
weighted_sampling=True,
optimization_order=optimization_order,
exclude_input_fields_from_bootstrap_demos=False,
+ max_proposals_per_step=max_proposals_per_step,
)
+ trainer.set_random_seed(seed)
print(trainer)
train_dataset, val_dataset, test_dataset = load_datasets()
- trainer.fit(
+ ckpt, _ = trainer.fit(
train_dataset=train_dataset,
- val_dataset=test_dataset,
- # val_dataset=val_dataset,
- # test_dataset=test_dataset,
+ val_dataset=val_dataset,
+ test_dataset=test_dataset,
debug=debug,
- resume_from_ckpt="/Users/liyin/.adalflow/ckpt/TrecClassifierAdal/constrained_max_steps_12_5d1bf_run_1.json",
+ backward_pass_setup=backward_pass_setup,
+ # resume_from_ckpt="/Users/liyin/.adalflow/ckpt/TrecClassifierAdal/constrained_max_steps_12_5d1bf_run_1.json",
)
+ return ckpt
if __name__ == "__main__":
# TODO:
# Evaluating step(6): 0.7333 across 30 samples, Max potential: 0.7778: 83%|▊| 30/36 [00:08<00:01,
# Optimizer revert: 0.7096774193548387 <= 0.7777777777777778
- train(
+ import json
+
+ import random
+
+ random.seed(2025)
+ # np.random.seed(2025) # Set NumPy random seed
+
+ # make the strategy configurable in the script
+ import argparse
+
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--strategy", type=str, default="constrained")
+ parser.add_argument("--use_tg", action="store_true")
+ parser.add_argument("--max_proposals_per_step", type=int, default=5)
+ parser.add_argument(
+ "output_path", nargs="?", help="File path to save the checkpoint"
+ )
+
+ args = parser.parse_args()
+
+ set_strategy = args.strategy
+ set_output_path = args.output_path
+ use_tg = args.use_tg
+ max_proposals_per_step = args.max_proposals_per_step
+
+ ckpt = train(
**gpt_3_model,
- debug=False,
+ debug=True,
max_steps=12,
- strategy="constrained",
+ strategy=set_strategy,
optimization_order="sequential",
- )
- # val 0.694 -> 0.833, #test 0.8472 -> 0.833, adding more shots does not help
- # NOTE: raw: 40, bootstrap: 4, max_steps: 8, strategy: random, val: 86.1, test: 86.8 (+4.2% compared with dspy)
- # NOTE: train task without output format: val: 0.67->0.805, test: 0.805-> 0.896 # best performing model (zero-shot)
- # NOTE: train with without output format, use new class_name: constrained_max_steps_12_bac8d_run_1.json
- # val: 0.77.8, test: 0.86.8 #constrained_max_steps_12_138d9_run_1.json
-
- # REsume from the above, continue another 12 steps: val: 77.78% tets: 86.81%
- # result from the above, use bootstrap 1 shot: test -> 88.19% #constrained_max_steps_12_2ffa7_run_4.json (with input)
- # result from the above, use bootstrap 1 shot: no improvement, 86.81% #constrained_max_steps_12_2ffa7_run_5.json (with only rational and answers)
- # result from above, use bootstrap 2 shots: use input:no improvement
- # bootstrap is not helpful
- # 40 shots, 1 bootstrap, continue from last best, 86.1 val, 90.28% tes
- # 40 shots, resume, no improvment
- # continue from last best, 3 bootstrap, 83.3 val, 86.1 test (only rational)
- # continue from last best, 3 bootstrap, (both input and rational)86.1 val, 82.64 test (not really better)
- # NOTE:
- # continue from last best, 1 bootstrap, (both input and rational)86.1 val, 86.1 test (not really better)
- # TrecClassifierAdal/constrained_max_steps_12_2ffa7_run_2.json
-
-
-# theory: all few-shots demo or instruction, all so that the llm can reason better. Once it reches to its limits, no more shots can help or further instruction can.
-# there might be a saturation point!!!
+ seed=2025,
+ tg=use_tg,
+ max_proposals_per_step=max_proposals_per_step,
+ ) # val 0.694 -> 0.833, #test 0.8472 -> 0.833, adding more shots does not help
+
+ if set_output_path:
+ with open(set_output_path, "w") as f:
+ json.dump({"ckpt": ckpt}, f)
+ print(f"Checkpoint saved to {set_output_path}")
+ else:
+ print("No file path provided for saving the checkpoint.")
diff --git a/use_cases/classification/train_string_output.py b/use_cases/classification/train_string_output.py
index 9ecdef27..45fe5bcf 100644
--- a/use_cases/classification/train_string_output.py
+++ b/use_cases/classification/train_string_output.py
@@ -7,7 +7,7 @@
from use_cases.classification.data import load_datasets, TRECExtendedData
from adalflow.eval.answer_match_acc import AnswerMatchAcc
-from LightRAG.use_cases.config import (
+from use_cases.config import (
gpt_3_model,
gpt_4o_model,
)
diff --git a/use_cases/classification/trec_task_structured_output.py b/use_cases/classification/trec_task_structured_output.py
index eb5333cd..56014cc6 100644
--- a/use_cases/classification/trec_task_structured_output.py
+++ b/use_cases/classification/trec_task_structured_output.py
@@ -60,7 +60,7 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
# data="You are a classifier. Given a question, classify it into one of the following classes based on what the question is seeking:\n\nFormat: class_index. class_name, class_description\n\n0. ABBR, Abbreviation\n1. ENTY, Entity\n2. DESC, Description and abstract concept\n3. HUM, Human being\n4. LOC, Location\n5. NUM, Numeric value\n\nPay special attention to questions about entities versus descriptions, as well as those asking for specific terms or people. Do not try to answer the question:",
# best # data="You are a classifier. For each question given, classify it into one of the following classes:\n\nFormat: class_index. class_name, class_description\n\n0. ABBR, Abbreviation (includes initials)\n1. ENTY, Entity (includes products, languages, objects, etc.)\n2. DESC, Description and abstract concept (includes explanations)\n3. HUM, Human being (includes individuals, groups, etc.)\n4. LOC, Location (includes addresses, places, etc.)\n5. NUM, Numeric value (includes distances, dates, ages, etc.)\n\n- Focus on identifying the primary subject of the question and classifying based on what is being explicitly asked for.",
role_desc="Task description",
- requires_opt=False,
+ requires_opt=True,
param_type=adal.ParameterType.PROMPT,
),
"output_format_str": adal.Parameter(
@@ -70,12 +70,12 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
param_type=adal.ParameterType.PROMPT,
),
# NOTE: 88.19%
- "few_shot_demos": adal.Parameter(
- data=None,
- requires_opt=True,
- role_desc="Few shot examples to help the model",
- param_type=adal.ParameterType.DEMOS,
- ),
+ # "few_shot_demos": adal.Parameter(
+ # data=None,
+ # requires_opt=True,
+ # role_desc="Few shot examples to help the model",
+ # param_type=adal.ParameterType.DEMOS,
+ # ),
}
self.llm = adal.Generator(
@@ -96,7 +96,7 @@ def _prepare_input(self, question: str):
prompt_kwargs = {
"input_str": adal.Parameter(
data=input_str,
- requires_opt=True,
+ requires_opt=False,
role_desc="input to the LLM",
param_type=adal.ParameterType.INPUT,
)
@@ -108,6 +108,8 @@ def call(
) -> Union[adal.GeneratorOutput, adal.Parameter]:
prompt_kwargs = self._prepare_input(question)
output = self.llm(prompt_kwargs=prompt_kwargs, id=id)
+ if isinstance(output, adal.Parameter):
+ output.data_in_prompt = lambda x: x.data.raw_response
return output
diff --git a/use_cases/config.py b/use_cases/config.py
index 895ed097..440965a4 100644
--- a/use_cases/config.py
+++ b/use_cases/config.py
@@ -15,7 +15,20 @@
gpt_3_model = {
"model_client": OpenAIClient(input_type="text"),
"model_kwargs": {
- "model": "gpt-3.5-turbo",
+ "model": "gpt-3.5-turbo-0125",
+ "max_tokens": 2000,
+ "temperature": 0.0,
+ "top_p": 0.99,
+ "frequency_penalty": 0,
+ "presence_penalty": 0,
+ "stop": None,
+ },
+}
+
+gpt_3_1106_model = {
+ "model_client": OpenAIClient(input_type="text"),
+ "model_kwargs": {
+ "model": "gpt-3.5-turbo-1106",
"max_tokens": 2000,
"temperature": 0.0,
"top_p": 0.99,
@@ -38,10 +51,32 @@
},
}
+gpt_4_model = {
+ "model_client": OpenAIClient(),
+ "model_kwargs": {
+ "model": "gpt-4-turbo",
+ "temperature": 1,
+ "top_p": 0.99,
+ "max_tokens": 1000,
+ # "frequency_penalty": 1, # high for nto repeating prompt
+ },
+}
+
gpt_4o_model = {
"model_client": OpenAIClient(),
"model_kwargs": {
- "model": "gpt-4o-mini",
+ "model": "gpt-4o", # gpt-4o-realtime-preview-2024-12-17
+ "temperature": 1,
+ "top_p": 0.99,
+ # "max_tokens": 1000,
+ # "frequency_penalty": 0.8, # high for nto repeating prompt
+ },
+}
+
+gpt_4o1_model = {
+ "model_client": OpenAIClient(),
+ "model_kwargs": {
+ "model": "o1-preview",
"temperature": 1,
"top_p": 0.99,
"max_tokens": 1000,
diff --git a/use_cases/question_answering/bbh/data.py b/use_cases/question_answering/bbh/data.py
index d1fc3709..910b7e00 100644
--- a/use_cases/question_answering/bbh/data.py
+++ b/use_cases/question_answering/bbh/data.py
@@ -3,11 +3,11 @@
import re
from dataclasses import dataclass, field
-import adalflow as adal
from adalflow.core import DataClass
from adalflow.datasets.big_bench_hard import BigBenchHard
from adalflow.utils.data import subset_dataset
+from adalflow.core import func_to_parser
@dataclass
@@ -64,7 +64,7 @@ class QuestionAnswer(DataClass):
) # score can be used as weight for demo, weight = score (the higher the more likely to be sampled)
-@adal.fun_to_component
+@func_to_parser
def parse_integer_answer(answer: str):
"""A function that parses the last integer from a string using regular expressions."""
try:
@@ -81,7 +81,7 @@ def parse_integer_answer(answer: str):
return answer
-@adal.fun_to_component
+@func_to_parser
def extract_answer(answer: str) -> str:
try:
pattern = re.compile(r"Answer:\s*(.*)", re.DOTALL)
diff --git a/use_cases/question_answering/bbh/object_count/task.py b/use_cases/question_answering/bbh/object_count/task.py
index 6f5571f8..4892fe0f 100644
--- a/use_cases/question_answering/bbh/object_count/task.py
+++ b/use_cases/question_answering/bbh/object_count/task.py
@@ -37,12 +37,12 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
param_type=ParameterType.PROMPT,
instruction_to_optimizer="You can try to show examples to see if it helps.",
)
- few_shot_demos = adal.Parameter(
- data=None,
- role_desc="To provide few shot demos to the language model",
- requires_opt=False,
- param_type=ParameterType.DEMOS,
- )
+ # few_shot_demos = adal.Parameter(
+ # data=None,
+ # role_desc="To provide few shot demos to the language model",
+ # requires_opt=True,
+ # param_type=ParameterType.DEMOS,
+ # )
self.llm_counter = adal.Generator(
model_client=model_client,
@@ -50,19 +50,19 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
template=few_shot_template,
prompt_kwargs={
"system_prompt": system_prompt,
- "few_shot_demos": few_shot_demos,
+ # "few_shot_demos": few_shot_demos,
},
output_processors=parse_integer_answer,
use_cache=True,
)
- def call(
+ def bicall(
self, question: str, id: str = None
) -> Union[adal.GeneratorOutput, adal.Parameter]:
output = self.llm_counter(prompt_kwargs={"input_str": question}, id=id)
- print(f"output: {output}, training: {self.training}")
+ # print(f"output: {output}, training: {self.training}")
if self.training:
- if output.full_response.error and "429" in output.full_response.error:
+ if output.data.error and "429" in output.data.error:
raise ValueError("Rate limit exceeded")
else:
if output.error and "429" in output.error:
@@ -85,8 +85,9 @@ def test_object_count_task():
task_pipeline.train()
answer: adal.Parameter = task_pipeline(question, id="1")
print(answer)
- print(f"full_response: {answer.full_response}")
+ print(f"data: {answer.data}")
answer.draw_graph()
+ print(f"prompt_data: {answer.get_prompt_data()}")
if __name__ == "__main__":
diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
index 48309aa7..bb21ec3b 100644
--- a/use_cases/question_answering/bbh/object_count/train_new.py
+++ b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -58,8 +58,8 @@ def prepare_loss(
eval_input=sample.answer,
requires_opt=False,
)
- pred.eval_input = pred.full_response.data
- return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}}
+ pred.eval_input = pred.data.data
+ return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}, "id": sample.id}
# TODO: make the train diagnose on the student model and the teacher model automatcally
@@ -95,6 +95,9 @@ def train_diagnose_teacher(
# You will answer a reasoning question. Think step by step and double-check each calculation you make. Pay close attention to any numerical quantities in the text, converting written numbers into their numerical equivalents. Additionally, re-verify your final answer before concluding. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.
# 0.98 val, 0.91 test
+from adalflow.core.generator import BackwardPassSetup
+
+
def train(
train_batch_size=4, # larger batch size is not that effective, probably because of llm's lost in the middle
raw_shots: int = 0,
@@ -106,6 +109,9 @@ def train(
debug=False,
resume_from_ckpt=None,
exclude_input_fields_from_bootstrap_demos=False,
+ seed=None,
+ tg: bool = False,
+ max_proposals_per_step: int = 5,
):
adal_component = ObjectCountAdalComponent(
**gpt_3_model,
@@ -114,6 +120,13 @@ def train(
backward_engine_model_config=gpt_4o_model,
)
print(adal_component)
+ backward_pass_setup = None
+ if tg:
+ backward_pass_setup = BackwardPassSetup(
+ all_pred_at_once=False,
+ compute_grad_for_errors_only=False,
+ )
+
trainer = adal.Trainer(
train_batch_size=train_batch_size,
adaltask=adal_component,
@@ -123,43 +136,71 @@ def train(
raw_shots=raw_shots,
bootstrap_shots=bootstrap_shots,
debug=debug,
- weighted_sampling=True,
+ weighted_sampling=False,
optimization_order=optimization_order,
exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,
+ max_proposals_per_step=max_proposals_per_step,
)
+ trainer.set_random_seed(seed)
print(trainer)
train_dataset, val_dataset, test_dataset = load_datasets()
+ # train_dataset = train_dataset[:4]
+ # val_dataset = val_dataset[:4]
+ # test_dataset = test_dataset[:4]
+
ckpt, _ = trainer.fit(
train_dataset=train_dataset,
val_dataset=val_dataset,
test_dataset=test_dataset,
resume_from_ckpt=resume_from_ckpt,
+ backward_pass_setup=backward_pass_setup,
)
return ckpt
if __name__ == "__main__":
- import sys
import json
+ import random
+
+ random.seed(2025)
+ # np.random.seed(2025) # Set NumPy random seed
+
+ # make the strategy configurable in the script
+ import argparse
+
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--strategy", type=str, default="constrained")
+ parser.add_argument("--use_tg", action="store_true")
+ parser.add_argument("--max_proposals_per_step", type=int, default=5)
+ parser.add_argument(
+ "output_path", nargs="?", help="File path to save the checkpoint"
+ )
+
+ args = parser.parse_args()
+
+ set_strategy = args.strategy
+ set_output_path = args.output_path
+ use_tg = args.use_tg
+ max_proposals_per_step = args.max_proposals_per_step
+
ckpt = train(
- debug=False,
+ debug=True,
max_steps=12,
- strategy="constrained",
+ strategy=set_strategy,
exclude_input_fields_from_bootstrap_demos=True,
+ seed=2025, # pass the numpy seed
+ tg=use_tg,
+ max_proposals_per_step=max_proposals_per_step,
+ # resume_from_ckpt="/Users/liyin/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_dc778_run_1.json",
+ # resume_from_ckpt="/Users/liyin/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_18e8d_run_1.json",
)
print(f"ckpt: {ckpt}")
- # Save ckpt to a file passed as an argument
- if len(sys.argv) > 1: # Check if a file path is provided
- with open(sys.argv[1], "w") as f:
+ if set_output_path:
+ with open(set_output_path, "w") as f:
json.dump({"ckpt": ckpt}, f)
-
- # train_diagnose(**gpt_3_model)
- # train_diagnose_teacher(**gpt_4o_model) # 4omini works well as an optimizer too
- # /Users/liyin/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_49c63_run_1.json
- # 0.72 -> 0.9 val
- # 0.79 -> 0.92 test
- # 0.86->0.94 val, 0.79 -> 0.93 with only negative gradients /Users/liyin/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_7a649_run_1.json
-
- # without gradients -> 0.9 on tests
+ print(f"Checkpoint saved to {set_output_path}")
+ else:
+ print("No file path provided for saving the checkpoint.")
diff --git a/use_cases/question_answering/bbh/word_sorting/train.py b/use_cases/question_answering/bbh/word_sorting/train.py
index 12518206..c2a20da1 100644
--- a/use_cases/question_answering/bbh/word_sorting/train.py
+++ b/use_cases/question_answering/bbh/word_sorting/train.py
@@ -76,7 +76,7 @@ def prepare_loss(self, sample: Example, pred: adal.Parameter):
eval_input=sample.answer,
requires_opt=False,
)
- pred.eval_input = pred.full_response.data # processed
+ pred.eval_input = pred.data.data # processed
question_param = adal.Parameter(
name="question",
data=sample.question,
@@ -89,7 +89,8 @@ def prepare_loss(self, sample: Example, pred: adal.Parameter):
"pred_answer": pred,
"gt_answer": y_gt,
"question": question_param,
- }
+ },
+ "id": sample.id,
}
diff --git a/use_cases/text_grad_2.0_train.py b/use_cases/text_grad_2.0_train.py
index 37ff320d..90b9051e 100644
--- a/use_cases/text_grad_2.0_train.py
+++ b/use_cases/text_grad_2.0_train.py
@@ -1,21 +1,58 @@
import subprocess
import tempfile
import json
+import numpy as np
+import argparse
+num_runs = 4
# List of experiments to run
object_count = "use_cases/question_answering/bbh/object_count/train_new.py"
+trec_6_classification = "use_cases/classification/train.py"
hotpot_qa_multi_hop_rag = "benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py"
+hotpot_qa_multi_hop_rag_cycle = (
+ "benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag_cycle.py"
+)
+hotpot_qa_vanilla = "benchmarks/hotpot_qa/adal_exp/train_vanilla.py"
+hotpot_qa_vanilla_rag = "benchmarks/hotpot_qa/adal_exp/train_vanilla_rag.py"
+
ckpt_values = []
experiments = [
- object_count,
+ # object_count,
+ # trec_6_classification,
+ # hotpot_qa_vanilla_rag,
# hotpot_qa_multi_hop_rag,
+ hotpot_qa_multi_hop_rag_cycle,
]
+# set up the strategy for each experiment
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--strategy", type=str, default="constrained")
+argparser.add_argument("--use_tg", action="store_true")
+argparser.add_argument("--max_proposals_per_step", type=int, default=5)
+
+args = argparser.parse_args()
+
+strategy = args.strategy
+use_tg = args.use_tg
+max_proposals_per_step = args.max_proposals_per_step
+
# Optional: Arguments for each experiment (if needed)
+
+setup_str = f"--strategy {strategy}"
+
+if use_tg:
+ setup_str += " --use_tg"
+
+setup_str += f" --max_proposals_per_step {max_proposals_per_step}"
+
+
experiment_args = {
- object_count: "",
- # hotpot_qa_multi_hop_rag: "",
+ object_count: setup_str,
+ trec_6_classification: setup_str,
+ hotpot_qa_vanilla_rag: setup_str,
+ hotpot_qa_multi_hop_rag: setup_str,
}
ckpt_values = {}
@@ -47,12 +84,133 @@ def run_experiment(script, args):
if __name__ == "__main__":
+
+ result_file = "text_grad_2_results"
+ # add important run information in the naming of the file
+ import uuid
+
+ result_file = f"{result_file}_{num_runs}_runs_{uuid.uuid4()}.json"
+
for experiment in experiments:
args = experiment_args.get(experiment, "")
- ckpt = run_experiment(experiment, args)
- if ckpt:
- ckpt_values[experiment] = ckpt
+ for i in range(num_runs):
+ print(f"\nRun {i + 1}/{num_runs}")
+ ckpt = run_experiment(experiment, args)
+ ckpt_index = f"{experiment}_{i + 1}"
+ if ckpt:
+ ckpt_values[ckpt_index] = ckpt
+ # load all json files using the ckpt paths
+ highest_test_score, last_test_score, mean_test_score, standard_deviation = (
+ 0,
+ 0,
+ 0,
+ 0,
+ )
+ last_test_scores = []
+ highest_val_scores = []
+ total_passes = (
+ []
+ ) # each is the number of unique val scores in the highest val scores
+ total_prompts = [] # how many prompts tried in total
+
+ past_highest_val_scores = []
+ # # average pass rate, average pass prompts
+ # average_pass_rate_list = []
+ # average_pass_prompts_list = []
+ # average_total_prompts = []
+ # highest_test_score_json_file = None
+ total_steps = []
+ training_times = []
+ subset_pass_rate = []
+ valset_pass_rate = []
+ for experiment_index, ckpt in ckpt_values.items():
+ with open(ckpt, "r") as f:
+ data = json.load(f)
+ print(f"Experiment: {experiment_index}")
+ print(f"Data: {data}")
+ _high_val_score = max(data["val_scores"])
+ _unique_val_scores = len(set(data["val_scores"])) - 1
+ _last_test_score = data["test_score"]
+ # read the effective measures
+ effective_measures = data.get("effective_measure", {})
+
+ _total_prompts = effective_measures.get("subset", {}).get(
+ "pass", 0
+ ) + effective_measures.get("subset", {}).get("fail", 0)
+ if _total_prompts == 0:
+ _total_prompts = effective_measures.get("valset", {}).get(
+ "pass", 0
+ ) + effective_measures.get("valset", {}).get("fail", 0)
+ _total_steps = len(data["steps"]) - 1
+ _training_time = data.get("total_time", 0)
+ _subset_pass = effective_measures.get("subset", {}).get("pass", 0)
+ _subset_fail = effective_measures.get("subset", {}).get("fail", 0)
+ _valset_pass = effective_measures.get("valset", {}).get("pass", 0)
+ _valset_fail = effective_measures.get("valset", {}).get("fail", 0)
+ subset_pass_rate.append(_subset_pass / (_subset_pass + _subset_fail))
+ valset_pass_rate.append(_valset_pass / (_valset_pass + _valset_fail))
+ # save the results in the lists
+ past_highest_val_scores.append(_high_val_score)
+ total_passes.append(_unique_val_scores)
+ total_prompts.append(_total_prompts)
+ last_test_scores.append(_last_test_score)
+ total_steps.append(_total_steps)
+ training_times.append(_training_time)
+
+ # ensure all steps are the same
+ assert all(
+ [step == total_steps[0] for step in total_steps]
+ ), "All steps should be the same"
+
+ # compute the metrics
+ mean_test_score = np.mean(last_test_scores)
+ std_test_score = np.std(last_test_scores)
+
+ # val scores
+ mean_val_score = np.mean(past_highest_val_scores)
+ std_val_score = np.std(past_highest_val_scores)
+
+ # pass rate total_passes / steps
+ average_pass_rate = np.mean(total_passes) / total_steps[0]
+
+ # average total prompts
+ average_total_prompts = np.mean(total_prompts)
+
+ # average training time
+ average_training_time = np.mean(training_times)
+
+ # subset pass rate
+ average_subset_pass_rate = np.mean(subset_pass_rate)
+
+ # valset pass rate
+ average_valset_pass_rate = np.mean(valset_pass_rate)
+
+ # add these numbers in the ckpt_values
+ index = f"{experiment}_summary"
+ ckpt_values[index] = {
+ "config": {
+ "num_runs": num_runs,
+ "args": args,
+ },
+ "metrics": {
+ "mean_test_score": mean_test_score,
+ "std_test_score": std_test_score,
+ "mean_val_score": mean_val_score,
+ "std_val_score": std_val_score,
+ "average_pass_rate": average_pass_rate,
+ "average_total_prompts": average_total_prompts,
+ "average_training_time": average_training_time,
+ "average_subset_pass_rate": average_subset_pass_rate,
+ "average_valset_pass_rate": average_valset_pass_rate,
+ },
+ }
print("\nAll Checkpoints:")
for experiment, ckpt in ckpt_values.items():
print(f"{experiment}: {ckpt}")
+
+ # Save the results to a file
+ with open(result_file, "w") as f:
+ json.dump(ckpt_values, f, indent=4)
+
+ print(f"\nResults saved to {result_file}")