-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #192 from ServiceNow/dtremblay/add_docling
Use `docling` package with default PdfConverter
- Loading branch information
Showing
6 changed files
with
444 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
defaults: | ||
- _self_ | ||
- llm: gpt4o | ||
|
||
environment: | ||
_target_: tapeagents.environment.ToolCollectionEnvironment | ||
tools: | ||
- _target_: tapeagents.tools.document_reader.DocumentReader | ||
preferred_pdf_converter: | ||
_target_: hydra.utils.get_class | ||
path: tapeagents.tools.converters.PdfConverter | ||
|
||
agent: | ||
_target_: tapeagents.agent.Agent | ||
name: document_agent | ||
max_iterations: 2 | ||
llms: | ||
default: ${llm} | ||
templates: | ||
system_prompt: | | ||
You will help the user to extract information from files. | ||
Use as many relevant tools as possible to include more details and facts in your responses. | ||
allowed_tools: | | ||
You have access to the following tools: | ||
{tools_description} | ||
thought_format: | | ||
Important! Respond with the plain text, do not include any JSON or code. | ||
Do not output anything besides what I asked in this message. | ||
allowed_steps: | | ||
You have access to the following tools: | ||
{tools_description} | ||
You are allowed to produce ONLY steps with the following JSON schemas: | ||
{allowed_steps} | ||
Do not reproduce the schema when producing steps; use it as a reference. | ||
format: > | ||
Output only a single JSON dict. | ||
DO NOT OUTPUT ANYTHING BESIDES THE JSON! DO NOT PLACE ANY COMMENTS INSIDE THE JSON. | ||
It will break the system that processes the output. | ||
nodes: | ||
- _target_: tapeagents.nodes.StandardNode | ||
name: act | ||
system_prompt: ${agent.templates.system_prompt} | ||
guidance: | | ||
You have access to tools to read and convert files that contain useful information. Never call the same tool twice. | ||
The first step should be to simply read the data in the file. | ||
The second step should be to return the data to the user. | ||
${agent.templates.format} | ||
steps_prompt: ${agent.templates.allowed_steps} | ||
steps: | ||
- tapeagents.dialog_tape.AssistantAnswer | ||
use_known_actions: true | ||
next_node: act |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import argparse | ||
|
||
from hydra import compose, initialize | ||
from omegaconf import DictConfig | ||
|
||
from tapeagents.dialog_tape import DialogTape, UserStep | ||
from tapeagents.orchestrator import get_agent_and_env_from_config, main_loop | ||
|
||
|
||
def main(cfg: DictConfig, path: str) -> None: | ||
agent, env = get_agent_and_env_from_config(cfg) | ||
|
||
print("Run the agent!") | ||
for event in main_loop( | ||
agent, | ||
DialogTape() + [UserStep(content=f"Read and convert the document at `{path}` and return its results to me")], | ||
env, | ||
): | ||
if ae := event.agent_event: | ||
if ae.step: | ||
print(ae.step.model_dump_json(indent=2)) | ||
if event.observation: | ||
print(event.observation.model_dump_json(indent=2)) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--input-path", "-i", type=str, required=True, help="Document to convert") | ||
args = parser.parse_args() | ||
with initialize(version_base=None, config_path="../conf"): | ||
cfg = compose(config_name="convert_document") | ||
main(cfg, path=args.input_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.