Skip to content

Commit

Permalink
Update repro o1 o3 mini (#216)
Browse files Browse the repository at this point in the history
* add o1-mini + o3-mini configs

* Add o1-mini + o1-mini to repro journal

* remove duplicate o3-mini
  • Loading branch information
jardinetsouffleton authored Feb 12, 2025
1 parent 68fd4fd commit 4ccbf41
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 3 deletions.
18 changes: 18 additions & 0 deletions add_study_to_repro_journal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from pathlib import Path
from agentlab.experiments.study import Study


base_dir = "/home/toolkit/ui_copilot_results"

exp_paths = [
"2025-01-31_22-08-34_genericagent-o3-mini-2025-01-31-on-workarena-l1",
# '2025-02-02_01-53-45_genericagent-openai-o1-mini-2024-09-12-on-workarena-l1',
"2025-02-02_01-55-04_genericagent-openai-o1-mini-2024-09-12-on-workarena-l1",
]
full_paths = [os.path.join(base_dir, exp_path) for exp_path in exp_paths]

for full_path in full_paths:
study = Study.load(Path(full_path))

study.append_to_journal(strict_reproducibility=False)
10 changes: 9 additions & 1 deletion reproducibility_journal.csv
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,12 @@ ThibaultLSDC,GenericAgent-gpt-4o-mini_vision,visualwebarena,0.13.3,2024-12-02_02
ThibaultLSDC,GenericAgent-gpt-4o_vision,visualwebarena,0.13.3,2024-12-02_07-17-28,7fb7eac8-4bbd-4ebe-be32-15901a7678f2,0.267,0.015,65,910/910,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None,
ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta_vision,visualwebarena,0.13.3,2024-12-02_09-11-35,22f0611d-aeea-4ee9-a533-b45442b5e080,0.21,0.013,178,910/910,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None,
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,webarena,0.13.3,2024-12-02_23-18-38,fc5747bc-d998-4942-a0eb-e55a3ccc1cb3,0.184,0.014,213,811/812,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None,

Leo Boisvert,GenericAgent-o3-mini-2025-01-31,workarena_l1,0.4.1,2025-01-31_22-08-33,a74cc00f-f743-43a1-9cab-59af8bffa3a2,0.482,0.028,3,330/330,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.3,1.44.0,v0.3.2,73baabee6d7ac37a5b8677c80baf83914a4f4dc4," M: src/agentlab/agents/generic_agent/__init__.py
M: src/agentlab/agents/generic_agent/agent_configs.py
M: src/agentlab/analyze/agent_xray.py
M: src/agentlab/llm/chat_api.py
M: src/agentlab/llm/llm_configs.py",0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29,"
Leo Boisvert,GenericAgent-openai_o1-mini-2024-09-12,workarena_l1,0.4.1,2025-02-02_01-55-04,f3e1fcb8-5fc5-4115-9e00-27251508e2c7,0.518,0.028,5,330/330,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.3,1.44.0,v0.3.2,73baabee6d7ac37a5b8677c80baf83914a4f4dc4," M: src/agentlab/agents/generic_agent/__init__.py
M: src/agentlab/agents/generic_agent/agent_configs.py
M: src/agentlab/analyze/agent_xray.py
M: src/agentlab/llm/llm_configs.py",0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29,"
6 changes: 4 additions & 2 deletions src/agentlab/agents/generic_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@
AGENT_4o_MINI,
AGENT_CLAUDE_SONNET_35,
AGENT_4o_VISION,
AGENT_4o_MINI_VISION,
AGENT_CLAUDE_SONNET_35_VISION,
AGENT_o3_MINI,
AGENT_o1_MINI,
)

__all__ = [
"AGENT_3_5",
"AGENT_4o",
"AGENT_4o_MINI",
"AGENT_4o_VISION",
"AGENT_o3_MINI",
"AGENT_o1_MINI",
"AGENT_LLAMA3_70B",
"AGENT_LLAMA31_70B",
"AGENT_8B",
Expand Down
9 changes: 9 additions & 0 deletions src/agentlab/agents/generic_agent/agent_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,15 @@
flags=FLAGS_GPT_4o,
)

AGENT_o3_MINI = GenericAgentArgs(
chat_model_args=CHAT_MODEL_ARGS_DICT["openai/o3-mini-2025-01-31"],
flags=FLAGS_GPT_4o,
)

AGENT_o1_MINI = GenericAgentArgs(
chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/openai/o1-mini-2024-09-12"],
flags=FLAGS_GPT_4o,
)
# GPT-4o vision default config
FLAGS_GPT_4o_VISION = FLAGS_GPT_4o.copy()
FLAGS_GPT_4o_VISION.obs.use_screenshot = True
Expand Down
7 changes: 7 additions & 0 deletions src/agentlab/llm/llm_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@
max_input_tokens=16_384,
max_new_tokens=4096,
),
"openai/o1-mini": OpenAIModelArgs(
model_name="openai/o1-mini",
max_total_tokens=128_000,
max_input_tokens=128_000,
max_new_tokens=64_000,
temperature=1e-1,
),
"azure/gpt-35-turbo/gpt-35-turbo": AzureModelArgs(
model_name="gpt-35-turbo",
deployment_name="gpt-35-turbo",
Expand Down

0 comments on commit 4ccbf41

Please sign in to comment.