Skip to content

Commit

Permalink
Initial commit of FileSurfer in AgentChat/ext. WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
afourney committed Nov 25, 2024
1 parent 1b2d42d commit 3ffd56e
Show file tree
Hide file tree
Showing 8 changed files with 756 additions and 1 deletion.
1 change: 1 addition & 0 deletions python/packages/autogen-ext/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ langchain = ["langchain_core~= 0.3.3"]
azure = ["azure-core", "azure-identity"]
docker = ["docker~=7.0"]
openai = ["openai>=1.3"]
file-surfer = ["markitdown"]
web-surfer = [
"playwright>=1.48.0",
"pillow>=11.0.0",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._openai_assistant_agent import OpenAIAssistantAgent
from .file_surfer._file_surfer import FileSurfer
from .web_surfer._multimodal_web_surfer import MultimodalWebSurfer

__all__ = ["MultimodalWebSurfer", "OpenAIAssistantAgent"]
__all__ = ["MultimodalWebSurfer", "OpenAIAssistantAgent", "FileSurfer"]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import json
import time
import traceback
from typing import List, Sequence, Tuple

from autogen_agentchat.agents import BaseChatAgent
from autogen_agentchat.base import Response
from autogen_agentchat.messages import (
ChatMessage,
MultiModalMessage,
TextMessage,
)
from autogen_core.base import CancellationToken
from autogen_core.components import FunctionCall
from autogen_core.components.models import (
ChatCompletionClient,
LLMMessage,
SystemMessage,
UserMessage,
)

from ._markdown_file_browser import MarkdownFileBrowser

# from typing_extensions import Annotated
from ._tool_definitions import (
TOOL_FIND_NEXT,
TOOL_FIND_ON_PAGE_CTRL_F,
TOOL_OPEN_LOCAL_FILE,
TOOL_PAGE_DOWN,
TOOL_PAGE_UP,
)


class FileSurfer(BaseChatAgent):
DEFAULT_DESCRIPTION = "An agent that can handle local files."

DEFAULT_SYSTEM_MESSAGES = [
SystemMessage("""
You are a helpful AI Assistant.
When given a user query, use available functions to help the user with their request."""),
]

def __init__(
self,
name: str,
model_client: ChatCompletionClient,
description: str = DEFAULT_DESCRIPTION,
) -> None:
super().__init__(name, description)
self._model_client = model_client
self._chat_history: List[LLMMessage] = []
self._browser = MarkdownFileBrowser(viewport_size=1024 * 5)

@property
def produced_message_types(self) -> List[type[ChatMessage]]:
return [TextMessage]

async def on_messages(self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken) -> Response:
for chat_message in messages:
if isinstance(chat_message, TextMessage | MultiModalMessage):
self._chat_history.append(UserMessage(content=chat_message.content, source=chat_message.source))
else:
raise ValueError(f"Unexpected message in FileSurfer: {chat_message}")
try:
_, content = await self._generate_reply(cancellation_token=cancellation_token)
if isinstance(content, str):
return Response(chat_message=TextMessage(content=content, source=self.name))
else:
return Response(chat_message=MultiModalMessage(content=content, source=self.name))
except BaseException:
return Response(
chat_message=TextMessage(content=f"File surfing error:\n\n{traceback.format_exc()}", source=self.name)
)

async def on_reset(self, cancellation_token: CancellationToken) -> None:
self._chat_history.clear()

def _get_browser_state(self) -> Tuple[str, str]:
"""
Get the current state of the browser, including the header and content.
"""
header = f"Address: {self._browser.address}\n"

if self._browser.page_title is not None:
header += f"Title: {self._browser.page_title}\n"

current_page = self._browser.viewport_current_page
total_pages = len(self._browser.viewport_pages)

address = self._browser.address
for i in range(len(self._browser.history) - 2, -1, -1): # Start from the second last
if self._browser.history[i][0] == address:
header += f"You previously visited this page {round(time.time() - self._browser.history[i][1])} seconds ago.\n"
break

header += f"Viewport position: Showing page {current_page+1} of {total_pages}.\n"

return (header, self._browser.viewport)

async def _generate_reply(self, cancellation_token: CancellationToken) -> Tuple[bool, str]:
history = self._chat_history[0:-1]
last_message = self._chat_history[-1]
assert isinstance(last_message, UserMessage)

task_content = last_message.content # the last message from the sender is the task

assert self._browser is not None

context_message = UserMessage(
source="user",
content=f"Your browser is currently open to the page '{self._browser.page_title}' at the address '{self._browser.address}'.",
)

task_message = UserMessage(
source="user",
content=task_content,
)

create_result = await self._model_client.create(
messages=history + [context_message, task_message],
tools=[
TOOL_OPEN_LOCAL_FILE,
TOOL_PAGE_DOWN,
TOOL_PAGE_UP,
TOOL_FIND_NEXT,
TOOL_FIND_ON_PAGE_CTRL_F,
],
cancellation_token=cancellation_token,
)

response = create_result.content

if isinstance(response, str):
return False, response

elif isinstance(response, list) and all(isinstance(item, FunctionCall) for item in response):
function_calls = response
for function_call in function_calls:
tool_name = function_call.name

try:
arguments = json.loads(function_call.arguments)
except json.JSONDecodeError as e:
error_str = f"File surfer encountered an error decoding JSON arguments: {e}"
return False, error_str

if tool_name == "open_local_file":
path = arguments["path"]
self._browser.open_local_file(path)
elif tool_name == "page_up":
self._browser.page_up()
elif tool_name == "page_down":
self._browser.page_down()
elif tool_name == "find_on_page_ctrl_f":
search_string = arguments["search_string"]
self._browser.find_on_page(search_string)
elif tool_name == "find_next":
self._browser.find_next()
header, content = self._get_browser_state()
final_response = header.strip() + "\n=======================\n" + content
return False, final_response

final_response = "TERMINATE"
return False, final_response
Loading

0 comments on commit 3ffd56e

Please sign in to comment.