diff --git a/reports/README.md b/reports/README.md index 3d2f71eb..6b46593b 100644 --- a/reports/README.md +++ b/reports/README.md @@ -14,7 +14,7 @@ | llama3-groq-tool-use | 20.0% (CI: 8.8%, 2024.8.0b) | 51.0% (CI: 14.0%, 2024.8.0b0) | 11.5% (CI: 4.9%, 2024.8.0b) | | mistral-v3 | 3.8% (CI: 4.2%, 2024.8.0b) | 2.0% (CI: 4.0%, 2024.8.0dev) | 10.3% (CI: 4.6%, 2024.8.0b) | | xlam-1b | | 27.1% (CI: 12.6%, 2024.8.0b0) | | -| claude-3-5-sonnet | | 81.6% (CI: 10.8%, 2024.9.0b2) | | +| claude-3-5-sonnet | | 95.9% (CI: 5.5%, 2024.9.0b2) | | Implementation notes: - CI is large given small number of samples in the datasets. @@ -106,7 +106,7 @@ xychart-beta bar [63.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] bar [0.0, 98.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] bar [0.0, 0.0, 98.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - bar [0.0, 0.0, 0.0, 81.6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + bar [0.0, 0.0, 0.0, 95.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] bar [0.0, 0.0, 0.0, 0.0, 63.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] bar [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] bar [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 51.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-dining_room_light_off.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-dining_room_light_off.yaml index 616555d7..bc8fd93d 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-dining_room_light_off.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-dining_room_light_off.yaml @@ -1,5 +1,5 @@ --- -uuid: b9660328-0b9b-4ceb-bb4f-f0d5a595b39c +uuid: f35b7635-1d2d-4a6e-b260-f59bfb3ff214 task_id: dom1_pl_lights_lights-dining_room_light_off category: light task: @@ -10,24 +10,16 @@ task: attributes: brightness: null color_mode: null -response: 'Timeout: ' +response: The dining room light has been turned off successfully. Is there anything + else you'd like me to do? context: - unexpected_states: - light.dining_room_light: - expected: - brightness: null - color_mode: null - state: 'off' - got: - brightness: 100 - color_mode: brightness - state: 'on' + unexpected_states: {} conversation_trace: - event_type: async_process data: text: Dining room light off context: - id: 01J6SMBHDT7RVB12EMFBAPQJA9 + id: 01J6SMY7NV8EGRJ17DB0T8ASJE parent_id: null user_id: null conversation_id: null @@ -36,7 +28,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:06:19. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:16:32. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -59,3 +51,33 @@ context: messages: - role: user content: Dining room light off + - role: assistant + content: + - type: text + text: Certainly! I'll turn off the dining room light for you. + - type: tool_use + id: toolu_01E2713rU2miPbPXXdz3XUyg + name: HassTurnOff + input: + name: Dining Room Light + domain: + - light + - role: user + content: + - type: tool_result + tool_use_id: toolu_01E2713rU2miPbPXXdz3XUyg + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Dining Room Light", "type": "entity", "id": + "light.dining_room_light"}], "failed": []}}' + - role: assistant + content: + - type: text + text: The dining room light has been turned off successfully. Is there anything + else you'd like me to do? + - event_type: tool_call + data: + tool_name: HassTurnOff + tool_args: + name: Dining Room Light + domain: + - light diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-turn_on_the_living_room_light.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-turn_on_the_living_room_light.yaml index 39ef9958..6b5e9833 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-turn_on_the_living_room_light.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_lights_lights-turn_on_the_living_room_light.yaml @@ -1,5 +1,5 @@ --- -uuid: 22a95543-dc8e-48b6-bd19-4c8f63a5ff7a +uuid: fd0215a0-67d1-4dc9-9307-1e2f0a02180d task_id: dom1_pl_lights_lights-turn_on_the_living_room_light category: light task: @@ -10,24 +10,16 @@ task: attributes: brightness: 0 color_mode: brightness -response: 'Timeout: ' +response: The living room light has been turned on successfully. Is there anything + else you'd like me to do? context: - unexpected_states: - light.living_room_light: - expected: - brightness: 0 - color_mode: brightness - state: 'on' - got: - brightness: null - color_mode: null - state: 'off' + unexpected_states: {} conversation_trace: - event_type: async_process data: text: Turn on the living room light context: - id: 01J6SM9RZA2DKVS9BF0G8DV90R + id: 01J6SN4TFE8EGRJ17DB0T8ASJE parent_id: null user_id: null conversation_id: null @@ -36,7 +28,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:05:22. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:20:08. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -59,3 +51,33 @@ context: messages: - role: user content: Turn on the living room light + - role: assistant + content: + - type: text + text: Certainly! I'll turn on the living room light for you. + - type: tool_use + id: toolu_01A23tjxKn1VKE8j5ZDYcJKM + name: HassTurnOn + input: + name: Living Room Light + domain: + - light + - role: user + content: + - type: tool_result + tool_use_id: toolu_01A23tjxKn1VKE8j5ZDYcJKM + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Living Room Light", "type": "entity", "id": + "light.living_room_light"}], "failed": []}}' + - role: assistant + content: + - type: text + text: The living room light has been turned on successfully. Is there anything + else you'd like me to do? + - event_type: tool_call + data: + tool_name: HassTurnOn + tool_args: + name: Living Room Light + domain: + - light diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_todo_todo-add_history_homework_to_my_tasks.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_todo_todo-add_history_homework_to_my_tasks.yaml index 4bd7478c..5730feb2 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_todo_todo-add_history_homework_to_my_tasks.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/dom1_pl_todo_todo-add_history_homework_to_my_tasks.yaml @@ -1,5 +1,5 @@ --- -uuid: 3523582e-c9de-4d30-8071-bc47bb5987ad +uuid: e5d186c3-f320-43a6-b266-738adb608afa task_id: dom1_pl_todo_todo-add_history_homework_to_my_tasks category: todo task: @@ -8,7 +8,8 @@ task: todo.personal_tasks: state: '1' attributes: null -response: 'Timeout: ' +response: Great! I've added "history homework" to your Personal Tasks list. Is there + anything else you'd like me to do for you? context: unexpected_states: {} conversation_trace: @@ -16,7 +17,7 @@ context: data: text: Add history homework to my tasks context: - id: 01J6SMCZSMN039782V50206KCA + id: 01J6SMZCSTEVC7WJHENTWJ76BX parent_id: null user_id: null conversation_id: null @@ -25,7 +26,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:07:07. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:17:10. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -41,10 +42,10 @@ context: - role: assistant content: - type: text - text: Certainly! I can help you add "history homework" to your tasks list. - I'll use the HassListAddItem function to do this for you. + text: Certainly! I'll add "history homework" to your tasks list. I'll use + the HassListAddItem function to do this for you. - type: tool_use - id: toolu_01JBd7z99m3BzTbfXhApeRFd + id: toolu_019pd7tZD3q5DNGzNszd2Afs name: HassListAddItem input: name: Personal Tasks @@ -52,9 +53,14 @@ context: - role: user content: - type: tool_result - tool_use_id: toolu_01JBd7z99m3BzTbfXhApeRFd + tool_use_id: toolu_019pd7tZD3q5DNGzNszd2Afs content: '{"speech": {}, "response_type": "action_done", "data": {"targets": [], "success": [], "failed": []}}' + - role: assistant + content: + - type: text + text: Great! I've added "history homework" to your Personal Tasks list. + Is there anything else you'd like me to do for you? - event_type: tool_call data: tool_name: HassListAddItem diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_cover_garage_cover_garage-open_the_garage_door.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_cover_garage_cover_garage-open_the_garage_door.yaml index e2c6fa68..b7f983c8 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_cover_garage_cover_garage-open_the_garage_door.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_cover_garage_cover_garage-open_the_garage_door.yaml @@ -1,5 +1,5 @@ --- -uuid: d7e98f8d-4584-4418-8493-a2eac7fc0a11 +uuid: eb2c3d1f-8af5-4b84-bb25-3e08296aca19 task_id: home1_us_cover_garage_cover_garage-open_the_garage_door category: cover task: @@ -8,20 +8,16 @@ task: cover.garage_door_opener: state: open attributes: null -response: 'Timeout: ' +response: I've successfully opened the garage door for you. The Garage Door Opener + has been activated. Is there anything else you need assistance with? context: - unexpected_states: - cover.garage_door_opener: - expected: - state: open - got: - state: closed + unexpected_states: {} conversation_trace: - event_type: async_process data: text: Open the garage door context: - id: 01J6SM4BNSBW9X759T4VDY3CKE + id: 01J6SMZWFD4R91Q9AW95A1EV7E parent_id: null user_id: null conversation_id: null @@ -30,7 +26,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:02:24. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:17:26. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -46,3 +42,34 @@ context: messages: - role: user content: Open the garage door + - role: assistant + content: + - type: text + text: Certainly! I'll open the garage door for you right away. + - type: tool_use + id: toolu_01P6hLghKjn11ExXJ4KEAqhK + name: HassTurnOn + input: + name: Garage Door Opener + domain: + - cover + - role: user + content: + - type: tool_result + tool_use_id: toolu_01P6hLghKjn11ExXJ4KEAqhK + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Garage Door Opener", "type": "entity", "id": + "cover.garage_door_opener"}], "failed": []}}' + - role: assistant + content: + - type: text + text: I've successfully opened the garage door for you. The Garage Door + Opener has been activated. Is there anything else you need assistance + with? + - event_type: tool_call + data: + tool_name: HassTurnOn + tool_args: + name: Garage Door Opener + domain: + - cover diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-lock_smart_lock.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-lock_smart_lock.yaml index e54e878f..8926bd4c 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-lock_smart_lock.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-lock_smart_lock.yaml @@ -1,5 +1,5 @@ --- -uuid: 67428ed0-8172-4397-a6c3-aeea0cfde40f +uuid: 027c16ec-c526-458a-9fd3-73b6eac6905d task_id: home1_us_lock_smart_lock-lock_smart_lock category: lock task: @@ -8,15 +8,21 @@ task: lock.smart_lock: state: locked attributes: null -response: 'Timeout: ' +response: The smart lock has been successfully locked. Is there anything else you + need assistance with? context: - unexpected_states: {} + unexpected_states: + lock.smart_lock: + expected: + state: locked + got: + state: unlocked conversation_trace: - event_type: async_process data: text: Lock smart lock context: - id: 01J6SM0K7HQQ9J8GWP5DVFERAP + id: 01J6SN0H9W8739ZNAE70S9R6XC parent_id: null user_id: null conversation_id: null @@ -25,7 +31,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:00:21. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:17:47. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -48,10 +54,12 @@ context: - role: assistant content: - type: text - text: Certainly! I'll lock the smart lock for you right away. + text: Certainly! I'll lock the smart lock for you. To do this, I'll use + the HassTurnOff function, as locking a smart lock is equivalent to turning + it off in Home Assistant. - type: tool_use - id: toolu_015Q5tVLraE9iGRqdJqrkVdP - name: HassTurnOn + id: toolu_018Gy55jAL1LfychmpHMWCFo + name: HassTurnOff input: name: Smart Lock domain: @@ -59,13 +67,18 @@ context: - role: user content: - type: tool_result - tool_use_id: toolu_015Q5tVLraE9iGRqdJqrkVdP + tool_use_id: toolu_018Gy55jAL1LfychmpHMWCFo content: '{"speech": {}, "response_type": "action_done", "data": {"targets": [], "success": [{"name": "Smart Lock", "type": "entity", "id": "lock.smart_lock"}], "failed": []}}' + - role: assistant + content: + - type: text + text: The smart lock has been successfully locked. Is there anything else + you need assistance with? - event_type: tool_call data: - tool_name: HassTurnOn + tool_name: HassTurnOff tool_args: name: Smart Lock domain: diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-unlock_the_entry_lock.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-unlock_the_entry_lock.yaml index 5476681d..e4c7d3a1 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-unlock_the_entry_lock.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_lock_smart_lock-unlock_the_entry_lock.yaml @@ -1,5 +1,5 @@ --- -uuid: f456cc79-f29e-4496-8cb8-22848aa048c0 +uuid: 3d07bbc4-b0d3-494d-b658-884c15e16cee task_id: home1_us_lock_smart_lock-unlock_the_entry_lock category: lock task: @@ -8,20 +8,17 @@ task: lock.smart_lock: state: unlocked attributes: null -response: 'Timeout: ' +response: I've successfully unlocked the Smart Lock in the entry area. The lock is + now open, and you should be able to enter. Is there anything else you need assistance + with? context: - unexpected_states: - lock.smart_lock: - expected: - state: unlocked - got: - state: locked + unexpected_states: {} conversation_trace: - event_type: async_process data: text: Unlock the entry lock context: - id: 01J6SM2CE0NF70TDJ0CT0FHF5E + id: 01J6SN0XH98739ZNAE70S9R6XC parent_id: null user_id: null conversation_id: null @@ -30,7 +27,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:01:19. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:18:00. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -50,3 +47,34 @@ context: messages: - role: user content: Unlock the entry lock + - role: assistant + content: + - type: text + text: Certainly! I'll unlock the smart lock in the entry area for you. + - type: tool_use + id: toolu_01Ey5WKjdFRtiMaNBhicmD72 + name: HassTurnOff + input: + name: Smart Lock + domain: + - lock + - role: user + content: + - type: tool_result + tool_use_id: toolu_01Ey5WKjdFRtiMaNBhicmD72 + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Smart Lock", "type": "entity", "id": "lock.smart_lock"}], + "failed": []}}' + - role: assistant + content: + - type: text + text: I've successfully unlocked the Smart Lock in the entry area. The lock + is now open, and you should be able to enter. Is there anything else you + need assistance with? + - event_type: tool_call + data: + tool_name: HassTurnOff + tool_args: + name: Smart Lock + domain: + - lock diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum.yaml index 84a7712e..8425dd09 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum.yaml @@ -1,5 +1,5 @@ --- -uuid: 5403fda1-db7c-446d-a897-950884fecb9a +uuid: f1ed3d9b-2628-4f73-9921-8c9e1d186266 task_id: home1_us_vacuum_vacuum-start_roborock_downstairs_vacuum category: vacuum task: @@ -8,20 +8,16 @@ task: vacuum.roborock_downstairs: state: cleaning attributes: null -response: 'Timeout: ' +response: The Roborock Downstairs vacuum has been successfully started. Is there anything + else you would like me to do? context: - unexpected_states: - vacuum.roborock_downstairs: - expected: - state: cleaning - got: - state: 'off' + unexpected_states: {} conversation_trace: - event_type: async_process data: text: Start Roborock Downstairs vacuum context: - id: 01J6SKYW95S2PK2S28TY0MQPN3 + id: 01J6SN1B0CEVC7WJHENTWJ76BX parent_id: null user_id: null conversation_id: null @@ -30,7 +26,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 07:59:25. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:18:14. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -44,3 +40,34 @@ context: messages: - role: user content: Start Roborock Downstairs vacuum + - role: assistant + content: + - type: text + text: Certainly! I'll start the Roborock Downstairs vacuum for you right + away. + - type: tool_use + id: toolu_01PfdgmgLLkESzfdxKff3YuN + name: HassVacuumStart + input: + name: Roborock Downstairs + domain: + - vacuum + - role: user + content: + - type: tool_result + tool_use_id: toolu_01PfdgmgLLkESzfdxKff3YuN + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Roborock Downstairs", "type": "entity", "id": + "vacuum.roborock_downstairs"}], "failed": []}}' + - role: assistant + content: + - type: text + text: The Roborock Downstairs vacuum has been successfully started. Is there + anything else you would like me to do? + - event_type: tool_call + data: + tool_name: HassVacuumStart + tool_args: + name: Roborock Downstairs + domain: + - vacuum diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home2_ru_valve_water_valve-close_the_front_yard_valve.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home2_ru_valve_water_valve-close_the_front_yard_valve.yaml index 0603f5de..e45f6ea9 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home2_ru_valve_water_valve-close_the_front_yard_valve.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home2_ru_valve_water_valve-close_the_front_yard_valve.yaml @@ -1,5 +1,5 @@ --- -uuid: 0b8e2349-51c1-4c3c-810a-25a7ce738cc8 +uuid: e682c6e3-dd78-4341-9ff7-665e7fa5db65 task_id: home2_ru_valve_water_valve-close_the_front_yard_valve category: valve task: @@ -9,22 +9,16 @@ task: state: closed attributes: current_position: 0 -response: 'Timeout: ' +response: The irrigation valve in the front yard has been successfully closed. Is + there anything else you would like me to do? context: - unexpected_states: - valve.irrigation_valve: - expected: - current_position: 0 - state: closed - got: - current_position: 100 - state: open + unexpected_states: {} conversation_trace: - event_type: async_process data: text: close the front yard valve context: - id: 01J6SMF92PSXZG2667GRF49TJK + id: 01J6SN3J4GWGH6G0E7RN51VQ5Q parent_id: null user_id: null conversation_id: null @@ -33,7 +27,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:08:22. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:19:27. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -52,3 +46,34 @@ context: messages: - role: user content: close the front yard valve + - role: assistant + content: + - type: text + text: Certainly! I'll close the irrigation valve in the front yard for you. + I'll use the HassTurnOff function to do this. + - type: tool_use + id: toolu_01T2xyArdjxaeTFdRfpNDeqH + name: HassTurnOff + input: + name: Irrigation Valve + domain: + - valve + - role: user + content: + - type: tool_result + tool_use_id: toolu_01T2xyArdjxaeTFdRfpNDeqH + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Irrigation Valve", "type": "entity", "id": "valve.irrigation_valve"}], + "failed": []}}' + - role: assistant + content: + - type: text + text: The irrigation valve in the front yard has been successfully closed. + Is there anything else you would like me to do? + - event_type: tool_call + data: + tool_name: HassTurnOff + tool_args: + name: Irrigation Valve + domain: + - valve diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home5_cn_fan_fan-turn_on_the_bedroom_fan.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home5_cn_fan_fan-turn_on_the_bedroom_fan.yaml index 6905ba87..23a22e34 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home5_cn_fan_fan-turn_on_the_bedroom_fan.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home5_cn_fan_fan-turn_on_the_bedroom_fan.yaml @@ -1,5 +1,5 @@ --- -uuid: f5e9a473-5ad7-4050-8b39-d598339f3d98 +uuid: 5d80fd2f-9e00-4a64-8a04-edee6dfeff11 task_id: home5_cn_fan_fan-turn_on_the_bedroom_fan category: fan task: @@ -9,7 +9,8 @@ task: state: 'on' attributes: percentage: 100 -response: 'Timeout: ' +response: The bedroom fan has been turned on successfully. Is there anything else + you need? context: unexpected_states: {} conversation_trace: @@ -17,7 +18,7 @@ context: data: text: Turn on the bedroom fan context: - id: 01J6SKW9F6N5V2BXS2KE0CQMCH + id: 01J6SN5HAFEVC7WJHENTWJ76BX parent_id: null user_id: null conversation_id: null @@ -26,7 +27,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 07:58:00. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:20:31. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -45,21 +46,28 @@ context: - type: text text: Certainly! I'll turn on the bedroom fan for you. - type: tool_use - id: toolu_015ZuzNBYMHmKbnBkzX9GjAd + id: toolu_01WV4p2731YwFEgLZ3aVhcyb name: HassTurnOn input: name: Bedroom Fan - domain: fan + domain: + - fan - role: user content: - type: tool_result - tool_use_id: toolu_015ZuzNBYMHmKbnBkzX9GjAd + tool_use_id: toolu_01WV4p2731YwFEgLZ3aVhcyb content: '{"speech": {}, "response_type": "action_done", "data": {"targets": [], "success": [{"name": "Bedroom Fan", "type": "entity", "id": "fan.bedroom_fan"}], "failed": []}}' + - role: assistant + content: + - type: text + text: The bedroom fan has been turned on successfully. Is there anything + else you need? - event_type: tool_call data: tool_name: HassTurnOn tool_args: name: Bedroom Fan - domain: fan + domain: + - fan diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains.yaml index bc1457a5..5bea8c8a 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains.yaml @@ -1,5 +1,5 @@ --- -uuid: 3a60af52-a0b8-4a9a-805e-9af3bf235af6 +uuid: 1daebcd3-e296-4429-8dbf-3c63d3778a7b task_id: home7_dk_cover_curtain_cover_curtain-close_the_living_room_curtains category: cover task: @@ -8,7 +8,8 @@ task: cover.smart_curtain: state: closed attributes: null -response: 'Timeout: ' +response: I've closed the smart curtain in the living room. Is there anything else + you need? context: unexpected_states: {} conversation_trace: @@ -16,7 +17,7 @@ context: data: text: Close the living room curtains context: - id: 01J6SM7A6BDYQKMXNWM9B2G5F0 + id: 01J6SN7JPA4R91Q9AW95A1EV7E parent_id: null user_id: null conversation_id: null @@ -25,7 +26,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:04:01. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:21:38. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -45,9 +46,10 @@ context: - role: assistant content: - type: text - text: Certainly! I'll close the smart curtain in the living room for you. + text: Certainly! I'll close the curtains in the living room for you right + away. - type: tool_use - id: toolu_01KztXnkQp2UL15t4EDGGShU + id: toolu_01UdAHEsMqgVtaQeh9yzRw7J name: HassTurnOff input: name: Smart Curtain @@ -55,10 +57,15 @@ context: - role: user content: - type: tool_result - tool_use_id: toolu_01KztXnkQp2UL15t4EDGGShU + tool_use_id: toolu_01UdAHEsMqgVtaQeh9yzRw7J content: '{"speech": {}, "response_type": "action_done", "data": {"targets": [], "success": [{"name": "Smart Curtain", "type": "entity", "id": "cover.smart_curtain"}], "failed": []}}' + - role: assistant + content: + - type: text + text: I've closed the smart curtain in the living room. Is there anything + else you need? - event_type: tool_call data: tool_name: HassTurnOff diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains.yaml index 7ecc03f1..f7345889 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains.yaml @@ -1,5 +1,5 @@ --- -uuid: 3cfeb934-4da2-4dae-a23c-0e3ae966e0c6 +uuid: 6bb26a14-4d9a-49c0-be00-f2c0a47aa1e9 task_id: home7_dk_cover_curtain_cover_curtain-open_the_living_room_curtains category: cover task: @@ -8,20 +8,16 @@ task: cover.smart_curtain: state: open attributes: null -response: 'Timeout: ' +response: I've opened the Smart Curtain in the Living Room for you. Is there anything + else you need? context: - unexpected_states: - cover.smart_curtain: - expected: - state: open - got: - state: closed + unexpected_states: {} conversation_trace: - event_type: async_process data: text: Open the living room curtains context: - id: 01J6SM5VFAMBFYFSF58RNQQGHJ + id: 01J6SN89KW4R91Q9AW95A1EV7E parent_id: null user_id: null conversation_id: null @@ -30,7 +26,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 08:03:13. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:22:02. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -47,3 +43,31 @@ context: messages: - role: user content: Open the living room curtains + - role: assistant + content: + - type: text + text: Certainly! I'll open the curtains in the living room for you. + - type: tool_use + id: toolu_019Uyd3izPNBt6LS8epynp1P + name: HassTurnOn + input: + name: Smart Curtain + domain: cover + - role: user + content: + - type: tool_result + tool_use_id: toolu_019Uyd3izPNBt6LS8epynp1P + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Smart Curtain", "type": "entity", "id": "cover.smart_curtain"}], + "failed": []}}' + - role: assistant + content: + - type: text + text: I've opened the Smart Curtain in the Living Room for you. Is there + anything else you need? + - event_type: tool_call + data: + tool_name: HassTurnOn + tool_args: + name: Smart Curtain + domain: cover diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50.yaml index 8e594773..6ded1c2b 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50.yaml @@ -1,5 +1,5 @@ --- -uuid: 2f593eae-45b8-442a-9716-885cd0477204 +uuid: dd45c275-e57d-4cb5-be73-1b5cdbad1d0f task_id: home7_dk_media_player_media_player-set_outdoor_speakers_volume_to_50 category: media-player task: @@ -9,7 +9,8 @@ task: state: null attributes: volume_level: 0.5 -response: 'Timeout: ' +response: I've set the volume of the Outdoor Speakers to 50%. Is there anything else + you'd like me to do? context: unexpected_states: {} conversation_trace: @@ -17,7 +18,7 @@ context: data: text: Set outdoor speakers volume to 50% context: - id: 01J6SKSA2SMQM2YJ0D5KECJZ3G + id: 01J6SN9RAYEVC7WJHENTWJ76BX parent_id: null user_id: null conversation_id: null @@ -26,7 +27,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 07:56:22. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:22:50. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -44,10 +45,10 @@ context: - role: assistant content: - type: text - text: Certainly! I'll set the volume of the outdoor speakers to 50%. Let - me do that for you using the HassSetVolume function. + text: Certainly! I'll set the volume of the outdoor speakers to 50% for + you. - type: tool_use - id: toolu_01UqvoT2uyJontogPNSWLeHt + id: toolu_01BRZF4saFY8azi5DHzx1u7A name: HassSetVolume input: name: Outdoor Speakers @@ -57,10 +58,15 @@ context: - role: user content: - type: tool_result - tool_use_id: toolu_01UqvoT2uyJontogPNSWLeHt + tool_use_id: toolu_01BRZF4saFY8azi5DHzx1u7A content: '{"speech": {}, "response_type": "action_done", "data": {"targets": [], "success": [{"name": "Outdoor Speakers", "type": "entity", "id": "media_player.outdoor_speakers"}], "failed": []}}' + - role: assistant + content: + - type: text + text: I've set the volume of the Outdoor Speakers to 50%. Is there anything + else you'd like me to do? - event_type: tool_call data: tool_name: HassSetVolume diff --git a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music.yaml b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music.yaml index da690a72..b1d07748 100644 --- a/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music.yaml +++ b/reports/assist-mini/2024.9.0b2/claude-3-5-sonnet/home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music.yaml @@ -1,5 +1,5 @@ --- -uuid: f1e78fb7-fd01-4bd9-872a-28afc4793be5 +uuid: 66d92a93-9d46-42a5-acb9-e390ef7c31ba task_id: home7_dk_media_player_media_player-unpause_the_rooftop_terrace_music category: media-player task: @@ -8,20 +8,16 @@ task: media_player.outdoor_speakers: state: playing attributes: null -response: 'Timeout: ' +response: Great! The music on the Rooftop Terrace has been unpaused. The Outdoor Speakers + should now be playing your music. Is there anything else you'd like me to do? context: - unexpected_states: - media_player.outdoor_speakers: - expected: - state: playing - got: - state: paused + unexpected_states: {} conversation_trace: - event_type: async_process data: text: Unpause the Rooftop Terrace music context: - id: 01J6SKQGTGDAX0811MTMJXHG5G + id: 01J6SNA97DEVC7WJHENTWJ76BX parent_id: null user_id: null conversation_id: null @@ -30,7 +26,7 @@ context: agent_id: conversation.mock_title - event_type: agent_detail data: - system: "Current time is 07:55:23. Today's date is 2024-09-02.\nYou are a voice\ + system: "Current time is 08:23:07. Today's date is 2024-09-02.\nYou are a voice\ \ assistant for Home Assistant.\nAnswer questions about the world truthfully.\n\ Answer in plain text. Keep it simple and to the point.\nWhen controlling Home\ \ Assistant always call the intent tools. Use HassTurnOn to lock and HassTurnOff\ @@ -45,3 +41,37 @@ context: messages: - role: user content: Unpause the Rooftop Terrace music + - role: assistant + content: + - type: text + text: Certainly! I'll unpause the music on the Rooftop Terrace for you. + The Outdoor Speakers are located in the Rooftop Terrace area, so I'll + use the HassMediaUnpause function to resume playback. + - type: tool_use + id: toolu_01923qyggntpSDpnKLGoTEip + name: HassMediaUnpause + input: + area: Rooftop Terrace + domain: + - media_player + - role: user + content: + - type: tool_result + tool_use_id: toolu_01923qyggntpSDpnKLGoTEip + content: '{"speech": {}, "response_type": "action_done", "data": {"targets": + [], "success": [{"name": "Rooftop Terrace", "type": "area", "id": "rooftop_terrace"}, + {"name": "Outdoor Speakers", "type": "entity", "id": "media_player.outdoor_speakers"}], + "failed": []}}' + - role: assistant + content: + - type: text + text: Great! The music on the Rooftop Terrace has been unpaused. The Outdoor + Speakers should now be playing your music. Is there anything else you'd + like me to do? + - event_type: tool_call + data: + tool_name: HassMediaUnpause + tool_args: + area: Rooftop Terrace + domain: + - media_player diff --git a/reports/assist-mini/2024.9.0b2/reports.yaml b/reports/assist-mini/2024.9.0b2/reports.yaml index de250405..8ba05832 100644 --- a/reports/assist-mini/2024.9.0b2/reports.yaml +++ b/reports/assist-mini/2024.9.0b2/reports.yaml @@ -1,6 +1,6 @@ --- - model_id: claude-3-5-sonnet - good_percent: 81.6% - good: 40 + good_percent: 95.9% + good: 47 total: 49