Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Step Metadata #26

Merged
merged 20 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/tape_improver.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from tapeagents.agent import Agent
from tapeagents.chain import Chain
from tapeagents.team import TeamTape
from tapeagents.core import (
Action,
AgentStep,
Expand All @@ -18,6 +17,7 @@
from tapeagents.llms import LLM, LiteLLM, LLMStream
from tapeagents.observe import observe_tape
from tapeagents.rendering import PrettyRenderer
from tapeagents.team import TeamTape
from tapeagents.utils import run_in_tmp_dir_to_make_test_data
from tapeagents.view import Call, Respond

Expand Down Expand Up @@ -122,7 +122,7 @@ def improver_tape_view(tape: Tape) -> str:
data.append(step.llm_dict())
data[-1]["index"] = index
if isinstance(step, AgentStep):
data[-1]["by"] = step.by
data[-1]["by"] = step._metadata.by
return json.dumps(data, indent=2)


Expand Down
10 changes: 5 additions & 5 deletions examples/workarena/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def start_task(
text=text,
current_page=self.browser.current_viewport,
total_pages=self.browser.n_viewports,
screenshot_path=screen,
)
obs._metadata.other["screenshot_path"] = screen
tape = WorkArenaTape(steps=[obs, WorkArenaTask(task=info["goal"])])
return tape, info

Expand Down Expand Up @@ -121,14 +121,15 @@ def react(self, tape: WorkArenaTape) -> WorkArenaTape:

def perform_browser_action(self, action: str) -> PageObservation:
text, screen, last_action_error, finished = self.browser.perform_action(action, self.baseline_obs)
return PageObservation(
obs = PageObservation(
text=text,
current_page=self.browser.current_viewport,
total_pages=self.browser.n_viewports,
screenshot_path=screen,
env_finished=finished,
last_action_error=last_action_error,
)
obs._metadata.other["screenshot_path"] = screen
obs._metadata.other["env_finished"] = finished
return obs

def goto_page(self, action: GotoPageAction) -> PageObservation:
return self.perform_browser_action(f"goto('{action.url}')")
Expand Down Expand Up @@ -156,7 +157,6 @@ def scroll(self, action: ScrollAction) -> PageObservation:
text=self.browser.scroll(action.direction),
current_page=self.browser.current_viewport,
total_pages=self.browser.n_viewports,
screenshot_path="",
)

def tab_focus(self, action: TabFocusAction) -> PageObservation:
Expand Down
5 changes: 0 additions & 5 deletions examples/workarena/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,8 @@ class PageObservation(WorkArenaObservation):
text: str
current_page: int
total_pages: int
screenshot_path: str = ""
env_finished: bool = False
last_action_error: str = ""

def llm_dict(self) -> dict[str, Any]:
return self.model_dump(exclude={"prompt_id", "screenshot_path", "env_finished"}, exclude_none=True)


class ReasoningThought(WorkArenaThought):
"""
Expand Down
7 changes: 6 additions & 1 deletion examples/workarena/tape_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,13 @@ def __init__(self, root_folder: str) -> None:
def render_step(self, step: Step | dict, folded: bool = True, **kwargs) -> str:
step_dict = step.model_dump() if isinstance(step, Step) else step
html = super().render_step(step, folded, **kwargs)
screenshot_path = None
if "screenshot_path" in step_dict:
screenshot_url = os.path.join("static", kwargs["tape_dir"], "screenshots", step_dict["screenshot_path"])
screenshot_path = step_dict["screenshot_path"]
if "screenshot_path" in step_dict.get("_metadata", {}).get("other", {}):
screenshot_path = step_dict["_metadata"]["other"]["screenshot_path"]
if screenshot_path:
screenshot_url = os.path.join("static", kwargs["tape_dir"], "screenshots", screenshot_path)
html = f"<div class='basic-renderer-box' style='background-color:#baffc9;'><div><img src='{screenshot_url}' style='max-width: 100%;'></div>{html}</div>"
return html

Expand Down
16 changes: 8 additions & 8 deletions tapeagents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def run_iteration(
llm_stream = self.llm.generate(prompt) if prompt else LLMStream(None, prompt)
for item in self.generate_steps(tape, llm_stream):
if isinstance(item, AgentStep):
item.prompt_id = llm_stream.prompt.id
item._metadata.prompt_id = llm_stream.prompt.id
yield item
new_steps.append(item)
else:
Expand All @@ -326,13 +326,13 @@ def _run_implementation():
stop = False
while n_iterations < self.max_iterations and not stop:
current_subagent = self.delegate(tape)
for item in current_subagent.run_iteration(tape):
if isinstance(item, PartialStep):
yield AgentEvent(partial_step=item)
elif isinstance(item, AgentStep):
item.by = current_subagent.full_name
tape = tape.append(item)
yield AgentEvent(step=item, partial_tape=tape)
for step in current_subagent.run_iteration(tape):
if isinstance(step, PartialStep):
yield AgentEvent(partial_step=step)
elif isinstance(step, AgentStep):
step._metadata.by = current_subagent.full_name
tape = tape.append(step)
yield AgentEvent(step=step, partial_tape=tape)
if self.should_stop(tape):
stop = True
else:
Expand Down
28 changes: 22 additions & 6 deletions tapeagents/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,24 @@ def completion_str(self) -> str:
return self.text[-self.n_predicted :]


class StepMetadata(BaseModel):
"""
Metadata for the step
"""

id: str = Field(default_factory=lambda: str(uuid4()))
prompt_id: str = ""
task: str = ""
by: str = ""
other: dict[str, Any] = Field(default_factory=dict)


class Step(BaseModel):
_metadata: StepMetadata = StepMetadata()

def llm_dict(self) -> dict[str, Any]:
"""Dump step data only, drop the metadata"""
return self.model_dump(exclude_none=True)
return self.model_dump(exclude_none=True, exclude={"_metadata"})

def llm_view(self, indent: int = 2) -> str:
return json.dumps(self.llm_dict(), indent=indent, ensure_ascii=False)
Expand All @@ -51,12 +65,14 @@ class Error(Observation):


class AgentStep(Step):
prompt_id: str = ""
task: str = ""
by: str = ""
pass

def llm_dict(self) -> dict:
return self.model_dump(exclude={"prompt_id", "task", "by"}, exclude_none=True)
def task(self, task: str) -> Self:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this become node?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, I want to discuss that with Dima one more time to make things more clear in my head :)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that this method mutates the object, so at the very least I'd call it set_task, but @jpt-sn 's comment is more important.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed to .by_node(). Are you ok with that name?

"""
Set the task that is being solved when the step is produced
"""
self._metadata.task = task
return self


class Thought(AgentStep):
Expand Down
2 changes: 1 addition & 1 deletion tapeagents/guided_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,5 @@ def parse_completion(self, completion: str, prompt_id: str) -> Generator[Step, N
)
return
for step in steps:
step.prompt_id = prompt_id
step._metadata.prompt_id = prompt_id
yield step
1 change: 1 addition & 0 deletions tapeagents/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ def _implementation():
known_prompts = list(self.completions.keys())
closest, score = closest_prompt(prompt_key, known_prompts)
if score >= 0.7:
print("NEW\n", prompt_key, "\n")
logger.warning(f"Closest prompt score {score:.3f}:\n{diff_strings(prompt_key, closest)}")
raise FatalError("prompt not found")
yield LLMEvent(completion=LLMMessage(content=completion))
Expand Down
16 changes: 8 additions & 8 deletions tapeagents/rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@
import re
from typing import Any, Type

from pydantic import BaseModel
import yaml
from pydantic import BaseModel

from .agent import Agent
from .team import CodeExecutionResult, ExecuteCode
from .observe import LLMCall, retrieve_tape_llm_calls
from .container_executor import CodeBlock
from .view import Call, Respond
from .core import Action, Episode, Observation, Prompt, Step, Tape, Thought
from .dialog_tape import (
AssistantStep,
DialogTape,
DialogContext,
DialogTape,
SystemStep,
ToolCalls,
ToolResult,
UserStep,
)
from .observe import LLMCall, retrieve_tape_llm_calls
from .team import CodeExecutionResult, ExecuteCode
from .view import Call, Respond


def render_dialog_plain_text(tape: DialogTape) -> str:
Expand Down Expand Up @@ -226,11 +226,11 @@ def render_step(self, step: Step, index: int, **kwargs):
class_ = "observation"
elif isinstance(step, Call):
role = ""
title = f"{step.by.split('/')[-1]} calls {step.agent_name}"
title = f"{step._metadata.by.split('/')[-1]} calls {step.agent_name}"
class_ = "call"
elif isinstance(step, Respond):
role = ""
parts = step.by.split("/")
parts = step._metadata.by.split("/")
title = f"{parts[-1]} responds to {parts[-2]}"
class_ = "return"
elif isinstance(step, Thought):
Expand Down Expand Up @@ -433,4 +433,4 @@ def render_subagents(agent, indent=4):

def render_tape_with_prompts(tape: Tape, renderer: BasicRenderer):
llm_calls = retrieve_tape_llm_calls(tape)
return renderer.style + renderer.render_tape(tape, llm_calls)
return renderer.style + renderer.render_tape(tape, llm_calls)
53 changes: 26 additions & 27 deletions tapeagents/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tapeagents.agent import DEFAULT, Agent, AgentStep, Node
from tapeagents.autogen_prompts import SELECT_SPEAKER_MESSAGE_AFTER_TEMPLATE, SELECT_SPEAKER_MESSAGE_BEFORE_TEMPLATE
from tapeagents.container_executor import extract_code_blocks
from tapeagents.core import FinalStep, Jump, Pass, Prompt, Tape
from tapeagents.core import FinalStep, Jump, Pass, Prompt, StepMetadata, Tape
from tapeagents.environment import CodeExecutionResult, ExecuteCode
from tapeagents.llms import LLM, LLMStream
from tapeagents.view import Broadcast, Call, Respond, TapeViewStack
Expand Down Expand Up @@ -124,7 +124,7 @@ def create_chat_initiator(
},
llms={DEFAULT: llm} if llm else {},
subagents=[teammate],
flow=([ExecuteCodeNode()] if execute_code else []) + [CallNode(), TerminateOrRepeatNode()],
flow=([ExecuteCodeNode()] if execute_code else []) + [CallNode(), TerminateOrRepeatNode()], # type: ignore
max_calls=max_calls,
init_message=init_message,
)
Expand All @@ -139,19 +139,18 @@ def generate_steps(
view = ActiveTeamAgentView(agent, tape)
recipients = agent.get_subagent_names()
last = view.messages[-1]
from_ = last.by.split("/")[-1]
from_ = last._metadata.by.split("/")[-1]
match last:
case Call():
yield Broadcast(task=self.name, content=last.content, from_=from_, to=list(recipients))
yield Broadcast(content=last.content, from_=from_, to=list(recipients)).task(self.name)
case Respond():
recipients = [name for name in recipients if name != last.by.split("/")[-1]]
recipients = [name for name in recipients if name != last._metadata.by.split("/")[-1]]
yield Broadcast(
task=self.name,
content=view.messages[-1].content,
from_=from_,
to=list(recipients),
)
case Broadcast(task=self.name):
).task(self.name)
case Broadcast(_metadata=StepMetadata(task=self.name)):
pass
case _:
assert False
Expand All @@ -175,12 +174,12 @@ def generate_steps(
# if last node
(other,) = agent.subagents
if view.should_generate_message:
yield Call(task=self.name, agent_name=other.name, content=llm_stream.get_text())
yield Call(agent_name=other.name, content=llm_stream.get_text()).task(self.name)
elif view.exec_result:
yield Call(task=self.name, agent_name=other.name, content=_exec_result_message(agent, tape))
yield Call(agent_name=other.name, content=_exec_result_message(agent, tape)).task(self.name)
else:
assert agent.init_message and not view.messages
yield Call(task=self.name, agent_name=other.name, content=agent.init_message)
yield Call(agent_name=other.name, content=agent.init_message).task(self.name)


class SelectAndCallNode(Node):
Expand Down Expand Up @@ -208,21 +207,21 @@ def generate_steps(
callee_name = llm_stream.get_text()
# check if the callee is an existing subagent
_ = agent.find_subagent(callee_name)
yield Call(task=self.name, agent_name=callee_name)
yield Call(agent_name=callee_name).task(self.name)


class ExecuteCodeNode(Node):
name: str = "execute_code"

def generate_steps(self, agent: logging.Any, tape: Tape, llm_stream: LLMStream) -> Generator[AgentStep, None, None]:
def generate_steps(self, agent: TeamAgent, tape: Tape, llm_stream: LLMStream) -> Generator[AgentStep, None, None]:
assert not llm_stream
view = ActiveTeamAgentView(agent, tape)
if view.last_non_empty_message is None:
yield Pass(task=self.name)
yield Pass().task(self.name)
elif code := extract_code_blocks(view.last_non_empty_message.content):
yield ExecuteCode(task=self.name, code=code)
yield ExecuteCode(code=code).task(self.name)
else:
yield Pass(task=self.name)
yield Pass().task(self.name)


class RespondNode(Node):
Expand All @@ -241,15 +240,15 @@ def generate_steps(
) -> Generator[AgentStep, None, None]:
view = ActiveTeamAgentView(agent, tape)
if view.should_generate_message:
yield Respond(task=self.name, content=llm_stream.get_text())
yield Respond(content=llm_stream.get_text()).task(self.name)
elif view.exec_result:
yield Respond(task=self.name, content=_exec_result_message(agent, tape))
yield Respond(content=_exec_result_message(agent, tape)).task(self.name)
else:
logger.info(
f"Agent {agent.full_name} had to respond with an empty message."
f" You might want to optimize your orchestration logic."
)
yield Respond(task=self.name)
yield Respond().task(self.name)


class TerminateOrRepeatNode(Node):
Expand All @@ -261,9 +260,9 @@ def generate_steps(
assert not llm_stream
view = ActiveTeamAgentView(agent, tape)
if view.should_stop:
yield FinalStep(task=self.name, reason="Termination message received")
yield FinalStep(reason="Termination message received").task(self.name)
else:
yield Jump(task=self.name, next_node=0)
yield Jump(next_node=0).task(self.name)


class RespondOrRepeatNode(Node):
Expand All @@ -274,9 +273,9 @@ def generate_steps(
) -> Generator[AgentStep, None, None]:
view = ActiveTeamAgentView(agent, tape)
if view.should_stop:
yield Respond(task=self.name)
yield Respond().task(self.name)
else:
yield Jump(task=self.name, next_node=0)
yield Jump(next_node=0).task(self.name)


def _exec_result_message(agent: TeamAgent, tape: TeamTape) -> str:
Expand All @@ -297,7 +296,7 @@ def _llm_messages_from_tape(agent: TeamAgent, tape: TeamTape) -> list[dict[str,
match step:
# When we make the LLM messages, we use "kind" == "user" for messages
# originating from other agents, and "kind" == "assistant" for messages by this agent.
case Call() if step.by == agent.full_name:
case Call() if step._metadata.by == agent.full_name:
# I called someone
llm_messages.append({"role": "assistant", "content": step.content})
case Call():
Expand All @@ -309,15 +308,15 @@ def _llm_messages_from_tape(agent: TeamAgent, tape: TeamTape) -> list[dict[str,
{
"role": "user",
"content": step.content,
"name": step.by.split("/")[-1],
"name": step._metadata.by.split("/")[-1],
}
)
case Respond() if step.by == agent.full_name:
case Respond() if step._metadata.by == agent.full_name:
# I responded to someone
llm_messages.append({"role": "assistant", "content": step.content})
case Respond():
# someone responded to me
who_returned = step.by.split("/")[-1]
who_returned = step._metadata.by.split("/")[-1]
llm_messages.append({"role": "user", "content": step.content, "name": who_returned})
case Broadcast():
llm_messages.append({"role": "user", "content": step.content, "name": step.from_})
Expand Down
Loading
Loading