diff --git a/examples/docs/agent_advanced.py b/examples/docs/agent_advanced.py
index 34b768b123..a3d4adb4ee 100644
--- a/examples/docs/agent_advanced.py
+++ b/examples/docs/agent_advanced.py
@@ -27,7 +27,7 @@
         ),
     ],
     # LLM model & endpoint configuration
-    model="openai/gpt-4",
+    model="openai/gpt-4o-mini",
     context_window_limit=8000,
     # embedding model & endpoint configuration (cannot be changed)
     embedding="openai/text-embedding-ada-002",
diff --git a/examples/docs/agent_basic.py b/examples/docs/agent_basic.py
index 978d383298..3d2808101e 100644
--- a/examples/docs/agent_basic.py
+++ b/examples/docs/agent_basic.py
@@ -18,7 +18,7 @@
         ),
     ],
     # set automatic defaults for LLM/embedding config
-    model="openai/gpt-4",
+    model="openai/gpt-4o-mini",
     embedding="openai/text-embedding-ada-002",
 )
 print(f"Created agent with name {agent_state.name} and unique ID {agent_state.id}")
diff --git a/examples/docs/node/example.ts b/examples/docs/node/example.ts
new file mode 100644
index 0000000000..7a358900ec
--- /dev/null
+++ b/examples/docs/node/example.ts
@@ -0,0 +1,138 @@
+import { LettaClient } from '@letta-ai/letta-client';
+import {
+  AssistantMessage,
+  ReasoningMessage,
+  ToolCallMessage,
+  ToolReturnMessage,
+} from '@letta-ai/letta-client/api/types';
+
+// Start letta server and run `npm run example`
+const client = new LettaClient({
+  baseUrl: 'http://localhost:8283',
+});
+
+const agent = await client.agents.create({
+  memoryBlocks: [
+    {
+      value: 'name: Caren',
+      label: 'human',
+    },
+  ],
+  model: 'openai/gpt-4o-mini',
+  embedding: 'openai/text-embedding-ada-002',
+});
+
+console.log('Created agent with name', agent.name);
+
+let messageText = "What's my name?";
+let response = await client.agents.messages.create(agent.id, {
+  messages: [
+    {
+      role: 'user',
+      content: messageText,
+    },
+  ],
+});
+
+console.log(`Sent message to agent ${agent.name}: ${messageText}`);
+console.log(
+  'Agent thoughts:',
+  (response.messages[0] as ReasoningMessage).reasoning,
+);
+console.log(
+  'Agent response:',
+  (response.messages[1] as AssistantMessage).content,
+);
+
+const custom_tool_source_code = `
+def secret_message():
+    """Return a secret message."""
+    return "Hello world!"
+    `.trim();
+
+const tool = await client.tools.upsert({
+  sourceCode: custom_tool_source_code,
+});
+
+await client.agents.tools.attach(agent.id, tool.id!);
+
+console.log(
+  `Created tool with name ${tool.name} and attached to agent ${agent.name}`,
+);
+
+messageText = 'Run secret message tool and tell me what it returns';
+response = await client.agents.messages.create(agent.id, {
+  messages: [
+    {
+      role: 'user',
+      content: messageText,
+    },
+  ],
+});
+
+console.log('Sent message to agent:', messageText);
+console.log(
+  'Agent thoughts',
+  (response.messages[0] as ReasoningMessage).reasoning,
+);
+console.log(
+  'Tool call information',
+  (response.messages[1] as ToolCallMessage).toolCall,
+);
+console.log(
+  'Tool response information',
+  (response.messages[2] as ToolReturnMessage).status,
+);
+console.log(
+  'Agent thoughts',
+  (response.messages[3] as ReasoningMessage).reasoning,
+);
+console.log(
+  'Agent response:',
+  (response.messages[4] as AssistantMessage).content,
+);
+
+let agentCopy = await client.agents.create({
+  model: 'openai/gpt-4o-mini',
+  embedding: 'openai/text-embedding-ada-002',
+});
+let block = await client.agents.coreMemory.retrieveBlock(agent.id, 'human');
+agentCopy = await client.agents.coreMemory.attachBlock(agentCopy.id, block.id!);
+
+console.log('Created agent copy with shared memory named', agentCopy.name);
+
+messageText =
+  "My name isn't Caren, it's Sarah. Please update your core memory with core_memory_replace";
+console.log(`Sent message to agent ${agentCopy.name}: ${messageText}`);
+
+response = await client.agents.messages.create(agentCopy.id, {
+  messages: [
+    {
+      role: 'user',
+      content: messageText,
+    },
+  ],
+});
+
+block = await client.agents.coreMemory.retrieveBlock(agentCopy.id, 'human');
+console.log(`New core memory for agent ${agentCopy.name}: ${block.value}`);
+
+messageText = "What's my name?";
+response = await client.agents.messages.create(agentCopy.id, {
+  messages: [
+    {
+      role: 'user',
+      content: messageText,
+    },
+  ],
+});
+
+console.log(`Sent message to agent ${agentCopy.name}: ${messageText}`);
+console.log(
+  'Agent thoughts:',
+  (response.messages[0] as ReasoningMessage).reasoning,
+);
+console.log(
+  'Agent response:',
+  (response.messages[1] as AssistantMessage).content,
+);
diff --git a/examples/docs/node/package-lock.json b/examples/docs/node/package-lock.json
new file mode 100644
index 0000000000..7724d0b21e
--- /dev/null
+++ b/examples/docs/node/package-lock.json
@@ -0,0 +1,1314 @@
+{
+  "name": "@letta-ai/core",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "@letta-ai/core",
+      "version": "0.1.0",
+      "dependencies": {
+        "@babel/core": "^7.26.7",
+        "@babel/parser": "^7.26.7",
+        "@babel/template": "^7.25.9",
+        "@babel/traverse": "^7.26.7",
+        "@babel/types": "^7.26.7",
+        "@letta-ai/letta-client": "^0.1.17",
+        "csstype": "^3.1.3",
+        "undici-types": "^7.3.0"
+      },
+      "devDependencies": {
+        "@types/node": "^22.12.0",
+        "ts-node": "^10.9.2",
+        "typescript": "^5.7.3"
+      }
+    },
+    "node_modules/@ampproject/remapping": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz",
+      "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@ampproject/remapping/node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.25",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
+      "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@babel/code-frame": {
+      "version": "7.26.2",
+      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
+      "integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-validator-identifier": "^7.25.9",
+        "js-tokens": "^4.0.0",
+        "picocolors": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/compat-data": {
+      "version": "7.26.5",
+      "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.26.5.tgz",
+      "integrity": "sha512-XvcZi1KWf88RVbF9wn8MN6tYFloU5qX8KjuF3E1PVBmJ9eypXfs4GRiJwLuTZL0iSnJUKn1BFPa5BPZZJyFzPg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/core": {
+      "version": "7.26.7",
+      "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.26.7.tgz",
+      "integrity": "sha512-SRijHmF0PSPgLIBYlWnG0hyeJLwXE2CgpsXaMOrtt2yp9/86ALw6oUlj9KYuZ0JN07T4eBMVIW4li/9S1j2BGA==",
+      "license": "MIT",
+      "dependencies": {
+        "@ampproject/remapping": "^2.2.0",
+        "@babel/code-frame": "^7.26.2",
+        "@babel/generator": "^7.26.5",
+        "@babel/helper-compilation-targets": "^7.26.5",
+        "@babel/helper-module-transforms": "^7.26.0",
+        "@babel/helpers": "^7.26.7",
+        "@babel/parser": "^7.26.7",
+        "@babel/template": "^7.25.9",
+        "@babel/traverse": "^7.26.7",
+        "@babel/types": "^7.26.7",
+        "convert-source-map": "^2.0.0",
+        "debug": "^4.1.0",
+        "gensync": "^1.0.0-beta.2",
+        "json5": "^2.2.3",
+        "semver": "^6.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/babel"
+      }
+    },
+    "node_modules/@babel/generator": {
+      "version": "7.26.5",
+      "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.26.5.tgz",
+      "integrity": "sha512-2caSP6fN9I7HOe6nqhtft7V4g7/V/gfDsC3Ag4W7kEzzvRGKqiv0pu0HogPiZ3KaVSoNDhUws6IJjDjpfmYIXw==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/parser": "^7.26.5",
+        "@babel/types": "^7.26.5",
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.25",
+        "jsesc": "^3.0.2"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/generator/node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.25",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
+      "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@babel/helper-compilation-targets": {
+      "version": "7.26.5",
+      "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.26.5.tgz",
+      "integrity": "sha512-IXuyn5EkouFJscIDuFF5EsiSolseme1s0CZB+QxVugqJLYmKdxI1VfIBOst0SUu4rnk2Z7kqTwmoO1lp3HIfnA==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/compat-data": "^7.26.5",
+        "@babel/helper-validator-option": "^7.25.9",
+        "browserslist": "^4.24.0",
+        "lru-cache": "^5.1.1",
+        "semver": "^6.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-module-imports": {
+      "version": "7.25.9",
+      "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.25.9.tgz",
+      "integrity": "sha512-tnUA4RsrmflIM6W6RFTLFSXITtl0wKjgpnLgXyowocVPrbYrLUXSBXDgTs8BlbmIzIdlBySRQjINYs2BAkiLtw==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/traverse": "^7.25.9",
+        "@babel/types": "^7.25.9"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-module-transforms": {
+      "version": "7.26.0",
+      "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.26.0.tgz",
+      "integrity": "sha512-xO+xu6B5K2czEnQye6BHA7DolFFmS3LB7stHZFaOLb1pAwO1HWLS8fXA+eh0A2yIvltPVmx3eNNDBJA2SLHXFw==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-module-imports": "^7.25.9",
+        "@babel/helper-validator-identifier": "^7.25.9",
+        "@babel/traverse": "^7.25.9"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      },
+      "peerDependencies": {
+        "@babel/core": "^7.0.0"
+      }
+    },
+    "node_modules/@babel/helper-string-parser": {
+      "version": "7.25.9",
+      "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
+      "integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-validator-identifier": {
+      "version": "7.25.9",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
+      "integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-validator-option": {
+      "version": "7.25.9",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.25.9.tgz",
+      "integrity": "sha512-e/zv1co8pp55dNdEcCynfj9X7nyUKUXoUEwfXqaZt0omVOmDe9oOTdKStH4GmAw6zxMFs50ZayuMfHDKlO7Tfw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helpers": {
+      "version": "7.26.7",
+      "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.7.tgz",
+      "integrity": "sha512-8NHiL98vsi0mbPQmYAGWwfcFaOy4j2HY49fXJCfuDcdE7fMIsH9a7GdaeXpIBsbT7307WU8KCMp5pUVDNL4f9A==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/template": "^7.25.9",
+        "@babel/types": "^7.26.7"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/parser": {
+      "version": "7.26.7",
+      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.7.tgz",
+      "integrity": "sha512-kEvgGGgEjRUutvdVvZhbn/BxVt+5VSpwXz1j3WYXQbXDo8KzFOPNG2GQbdAiNq8g6wn1yKk7C/qrke03a84V+w==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.26.7"
+      },
+      "bin": {
+        "parser": "bin/babel-parser.js"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@babel/template": {
+      "version": "7.25.9",
+      "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.9.tgz",
+      "integrity": "sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.25.9",
+        "@babel/parser": "^7.25.9",
+        "@babel/types": "^7.25.9"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/traverse": {
+      "version": "7.26.7",
+      "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.26.7.tgz",
+      "integrity": "sha512-1x1sgeyRLC3r5fQOM0/xtQKsYjyxmFjaOrLJNtZ81inNjyJHGIolTULPiSc/2qe1/qfpFLisLQYFnnZl7QoedA==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/code-frame": "^7.26.2",
+        "@babel/generator": "^7.26.5",
+        "@babel/parser": "^7.26.7",
+        "@babel/template": "^7.25.9",
+        "@babel/types": "^7.26.7",
+        "debug": "^4.3.1",
+        "globals": "^11.1.0"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/types": {
+      "version": "7.26.7",
+      "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.7.tgz",
+      "integrity": "sha512-t8kDRGrKXyp6+tjUh7hw2RLyclsW4TRoRvRHtSyAX9Bb5ldlFh+90YAYY6awRXrlB4G5G2izNeGySpATlFzmOg==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-string-parser": "^7.25.9",
+        "@babel/helper-validator-identifier": "^7.25.9"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@cspotcode/source-map-support": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
+      "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/trace-mapping": "0.3.9"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@jridgewell/gen-mapping": {
+      "version": "0.3.8",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.8.tgz",
+      "integrity": "sha512-imAbBGkb+ebQyxKgzv5Hu2nmROxoDOXHh80evxdoXNOrvAnVx7zimzc1Oo5h9RlfV4vPXaE2iM5pOFbvOCClWA==",
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/set-array": "^1.2.1",
+        "@jridgewell/sourcemap-codec": "^1.4.10",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/gen-mapping/node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.25",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
+      "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
+      "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/set-array": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz",
+      "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz",
+      "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==",
+      "license": "MIT"
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.9",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
+      "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.0.3",
+        "@jridgewell/sourcemap-codec": "^1.4.10"
+      }
+    },
+    "node_modules/@letta-ai/letta-client": {
+      "version": "0.1.17",
+      "resolved": "https://registry.npmjs.org/@letta-ai/letta-client/-/letta-client-0.1.17.tgz",
+      "integrity": "sha512-vS5S5g2cbpJM2AdYjtVUhOmabMGpBlmpHGyK+DUrbQ7hQH+/y9sWmycwvudZv9I4mDGLTOkPOLw/k0ogkEPkIQ==",
+      "dependencies": {
+        "dedent": "^1.0.0",
+        "form-data": "^4.0.0",
+        "form-data-encoder": "^4.0.2",
+        "formdata-node": "^6.0.3",
+        "node-fetch": "^2.7.0",
+        "qs": "^6.13.1",
+        "readable-stream": "^4.5.2",
+        "url-join": "4.0.1"
+      }
+    },
+    "node_modules/@tsconfig/node10": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz",
+      "integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node12": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz",
+      "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node14": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz",
+      "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@tsconfig/node16": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz",
+      "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/@types/node": {
+      "version": "22.12.0",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.12.0.tgz",
+      "integrity": "sha512-Fll2FZ1riMjNmlmJOdAyY5pUbkftXslB5DgEzlIuNaiWhXd00FhWxVC/r4yV/4wBb9JfImTu+jiSvXTkJ7F/gA==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~6.20.0"
+      }
+    },
+    "node_modules/@types/node/node_modules/undici-types": {
+      "version": "6.20.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
+      "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/abort-controller": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
+      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
+      "license": "MIT",
+      "dependencies": {
+        "event-target-shim": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=6.5"
+      }
+    },
+    "node_modules/acorn": {
+      "version": "8.14.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz",
+      "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==",
+      "dev": true,
+      "license": "MIT",
+      "bin": {
+        "acorn": "bin/acorn"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/acorn-walk": {
+      "version": "8.3.4",
+      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz",
+      "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "acorn": "^8.11.0"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/arg": {
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz",
+      "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
+      "license": "MIT"
+    },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/browserslist": {
+      "version": "4.24.4",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.4.tgz",
+      "integrity": "sha512-KDi1Ny1gSePi1vm0q4oxSF8b4DR44GF4BbmS2YdhPLOEqd8pDviZOGH/GsmRwoWJ2+5Lr085X7naowMwKHDG1A==",
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "caniuse-lite": "^1.0.30001688",
+        "electron-to-chromium": "^1.5.73",
+        "node-releases": "^2.0.19",
+        "update-browserslist-db": "^1.1.1"
+      },
+      "bin": {
+        "browserslist": "cli.js"
+      },
+      "engines": {
+        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
+      }
+    },
+    "node_modules/buffer": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
+      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "base64-js": "^1.3.1",
+        "ieee754": "^1.2.1"
+      }
+    },
+    "node_modules/call-bind-apply-helpers": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.1.tgz",
+      "integrity": "sha512-BhYE+WDaywFg2TBWYNXAE+8B1ATnThNBqXHP5nQu0jWJdVvY2hvkpyB3qOmtmDePiS5/BDQ8wASEWGMWRG148g==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/call-bound": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.3.tgz",
+      "integrity": "sha512-YTd+6wGlNlPxSuri7Y6X8tY2dmm12UMH66RpKMhiX6rsk5wXXnYgbUcOt8kiS31/AjfoTOvCsE+w8nZQLQnzHA==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "get-intrinsic": "^1.2.6"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/caniuse-lite": {
+      "version": "1.0.30001695",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001695.tgz",
+      "integrity": "sha512-vHyLade6wTgI2u1ec3WQBxv+2BrTERV28UXQu9LO6lZ9pYeMk34vjXFLOxo1A4UBA8XTL4njRQZdno/yYaSmWw==",
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "CC-BY-4.0"
+    },
+    "node_modules/combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "license": "MIT",
+      "dependencies": {
+        "delayed-stream": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/convert-source-map": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
+      "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==",
+      "license": "MIT"
+    },
+    "node_modules/create-require": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
+      "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/csstype": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
+      "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
+      "license": "MIT"
+    },
+    "node_modules/debug": {
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz",
+      "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==",
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/dedent": {
+      "version": "1.5.3",
+      "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz",
+      "integrity": "sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==",
+      "license": "MIT",
+      "peerDependencies": {
+        "babel-plugin-macros": "^3.1.0"
+      },
+      "peerDependenciesMeta": {
+        "babel-plugin-macros": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/diff": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
+      "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==",
+      "dev": true,
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=0.3.1"
+      }
+    },
+    "node_modules/dunder-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/electron-to-chromium": {
+      "version": "1.5.88",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.88.tgz",
+      "integrity": "sha512-K3C2qf1o+bGzbilTDCTBhTQcMS9KW60yTAaTeeXsfvQuTDDwlokLam/AdqlqcSy9u4UainDgsHV23ksXAOgamw==",
+      "license": "ISC"
+    },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-object-atoms": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/event-target-shim": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
+      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/events": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz",
+      "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.8.x"
+      }
+    },
+    "node_modules/form-data": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz",
+      "integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==",
+      "license": "MIT",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/form-data-encoder": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-4.0.2.tgz",
+      "integrity": "sha512-KQVhvhK8ZkWzxKxOr56CPulAhH3dobtuQ4+hNQ+HekH/Wp5gSOafqRAeTphQUJAIk0GBvHZgJ2ZGRWd5kphMuw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/formdata-node": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-6.0.3.tgz",
+      "integrity": "sha512-8e1++BCiTzUno9v5IZ2J6bv4RU+3UKDmqWUQD0MIMVCd9AdhWkO1gw57oo1mNEX1dMq2EGI+FbWz4B92pscSQg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/function-bind": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/gensync": {
+      "version": "1.0.0-beta.2",
+      "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz",
+      "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/get-intrinsic": {
+      "version": "1.2.7",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.7.tgz",
+      "integrity": "sha512-VW6Pxhsrk0KAOqs3WEd0klDiF/+V7gQOpAvY1jVU/LHmaD/kQO4523aiJuikX/QAKYiW6x8Jh+RJej1almdtCA==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.0.0",
+        "function-bind": "^1.1.2",
+        "get-proto": "^1.0.0",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "math-intrinsics": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/globals": {
+      "version": "11.12.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz",
+      "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-symbols": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/ieee754": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
+      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
+      "license": "MIT"
+    },
+    "node_modules/jsesc": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz",
+      "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==",
+      "license": "MIT",
+      "bin": {
+        "jsesc": "bin/jsesc"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/json5": {
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz",
+      "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==",
+      "license": "MIT",
+      "bin": {
+        "json5": "lib/cli.js"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/lru-cache": {
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
+      "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
+      "license": "ISC",
+      "dependencies": {
+        "yallist": "^3.0.2"
+      }
+    },
+    "node_modules/make-error": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
+      "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==",
+      "dev": true,
+      "license": "ISC"
+    },
+    "node_modules/math-intrinsics": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "1.52.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "license": "MIT"
+    },
+    "node_modules/node-fetch": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "license": "MIT",
+      "dependencies": {
+        "whatwg-url": "^5.0.0"
+      },
+      "engines": {
+        "node": "4.x || >=6.0.0"
+      },
+      "peerDependencies": {
+        "encoding": "^0.1.0"
+      },
+      "peerDependenciesMeta": {
+        "encoding": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/node-releases": {
+      "version": "2.0.19",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
+      "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==",
+      "license": "MIT"
+    },
+    "node_modules/object-inspect": {
+      "version": "1.13.3",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.3.tgz",
+      "integrity": "sha512-kDCGIbxkDSXE3euJZZXzc6to7fCrKHNI/hSRQnRuQ+BWjFNzZwiFF8fj/6o2t2G9/jTj8PSIYTfCLelLZEeRpA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/picocolors": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
+      "license": "ISC"
+    },
+    "node_modules/process": {
+      "version": "0.11.10",
+      "resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz",
+      "integrity": "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6.0"
+      }
+    },
+    "node_modules/qs": {
+      "version": "6.14.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz",
+      "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==",
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "side-channel": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/readable-stream": {
+      "version": "4.7.0",
+      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz",
+      "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==",
+      "license": "MIT",
+      "dependencies": {
+        "abort-controller": "^3.0.0",
+        "buffer": "^6.0.3",
+        "events": "^3.3.0",
+        "process": "^0.11.10",
+        "string_decoder": "^1.3.0"
+      },
+      "engines": {
+        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+      }
+    },
+    "node_modules/safe-buffer": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
+      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
+    "node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
+    "node_modules/side-channel": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
+      "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3",
+        "side-channel-list": "^1.0.0",
+        "side-channel-map": "^1.0.1",
+        "side-channel-weakmap": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-list": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
+      "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-map": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz",
+      "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-weakmap": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz",
+      "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3",
+        "side-channel-map": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/string_decoder": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
+      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+      "license": "MIT",
+      "dependencies": {
+        "safe-buffer": "~5.2.0"
+      }
+    },
+    "node_modules/tr46": {
+      "version": "0.0.3",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
+      "license": "MIT"
+    },
+    "node_modules/ts-node": {
+      "version": "10.9.2",
+      "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz",
+      "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "@cspotcode/source-map-support": "^0.8.0",
+        "@tsconfig/node10": "^1.0.7",
+        "@tsconfig/node12": "^1.0.7",
+        "@tsconfig/node14": "^1.0.0",
+        "@tsconfig/node16": "^1.0.2",
+        "acorn": "^8.4.1",
+        "acorn-walk": "^8.1.1",
+        "arg": "^4.1.0",
+        "create-require": "^1.1.0",
+        "diff": "^4.0.1",
+        "make-error": "^1.1.1",
+        "v8-compile-cache-lib": "^3.0.1",
+        "yn": "3.1.1"
+      },
+      "bin": {
+        "ts-node": "dist/bin.js",
+        "ts-node-cwd": "dist/bin-cwd.js",
+        "ts-node-esm": "dist/bin-esm.js",
+        "ts-node-script": "dist/bin-script.js",
+        "ts-node-transpile-only": "dist/bin-transpile.js",
+        "ts-script": "dist/bin-script-deprecated.js"
+      },
+      "peerDependencies": {
+        "@swc/core": ">=1.2.50",
+        "@swc/wasm": ">=1.2.50",
+        "@types/node": "*",
+        "typescript": ">=2.7"
+      },
+      "peerDependenciesMeta": {
+        "@swc/core": {
+          "optional": true
+        },
+        "@swc/wasm": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.7.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.3.tgz",
+      "integrity": "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "7.3.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.3.0.tgz",
+      "integrity": "sha512-z2pHpkN2BEJl3QlQo0GtfGCyuhuBbWX60vzGwyn7ex/seM2UkvyGEfEV0Qb9pXc5StNfcJpsstgaf2YTEJa63Q==",
+      "license": "MIT"
+    },
+    "node_modules/update-browserslist-db": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.2.tgz",
+      "integrity": "sha512-PPypAm5qvlD7XMZC3BujecnaOxwhrtoFR+Dqkk5Aa/6DssiH0ibKoketaj9w8LP7Bont1rYeoV5plxD7RTEPRg==",
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "escalade": "^3.2.0",
+        "picocolors": "^1.1.1"
+      },
+      "bin": {
+        "update-browserslist-db": "cli.js"
+      },
+      "peerDependencies": {
+        "browserslist": ">= 4.21.0"
+      }
+    },
+    "node_modules/url-join": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz",
+      "integrity": "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==",
+      "license": "MIT"
+    },
+    "node_modules/v8-compile-cache-lib": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
+      "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/webidl-conversions": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/whatwg-url": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "license": "MIT",
+      "dependencies": {
+        "tr46": "~0.0.3",
+        "webidl-conversions": "^3.0.0"
+      }
+    },
+    "node_modules/yallist": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
+      "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==",
+      "license": "ISC"
+    },
+    "node_modules/yn": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
+      "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    }
+  }
+}
diff --git a/examples/docs/node/package.json b/examples/docs/node/package.json
new file mode 100644
index 0000000000..30e9860029
--- /dev/null
+++ b/examples/docs/node/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "@letta-ai/core",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "example": "node --no-warnings --import 'data:text/javascript,import { register } from \"node:module\"; import { pathToFileURL } from \"node:url\"; register(\"ts-node/esm\", pathToFileURL(\"./\"));' example.ts",
+    "build": "tsc"
+  },
+  "dependencies": {
+    "@letta-ai/letta-client": "^0.1.17"
+  },
+  "devDependencies": {
+    "@types/node": "^22.12.0",
+    "ts-node": "^10.9.2",
+    "typescript": "^5.7.3"
+  }
+}
diff --git a/examples/docs/node/tsconfig.json b/examples/docs/node/tsconfig.json
new file mode 100644
index 0000000000..5bbe072ecb
--- /dev/null
+++ b/examples/docs/node/tsconfig.json
@@ -0,0 +1,18 @@
+{
+  "compilerOptions": {
+    "target": "es2017",
+    "module": "esnext",
+    "lib": ["es2017", "dom"],
+    "declaration": true,
+    "strict": true,
+    "moduleResolution": "node",
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "outDir": "./dist",
+    "rootDir": ".",
+    "resolveJsonModule": true
+  },
+  "include": ["*.ts"],
+  "exclude": ["node_modules", "dist"]
+}
diff --git a/examples/docs/tools.py b/examples/docs/tools.py
index bc95913366..0f0a3086d9 100644
--- a/examples/docs/tools.py
+++ b/examples/docs/tools.py
@@ -45,7 +45,7 @@ def roll_d20() -> str:
         ),
     ],
     # set automatic defaults for LLM/embedding config
-    model="openai/gpt-4",
+    model="openai/gpt-4o-mini",
     embedding="openai/text-embedding-ada-002",
     # create the agent with an additional tool
     tool_ids=[tool.id],
@@ -88,7 +88,7 @@ def roll_d20() -> str:
             value="username: sarah",
         ),
     ],
-    model="openai/gpt-4",
+    model="openai/gpt-4o-mini",
     embedding="openai/text-embedding-ada-002",
     include_base_tools=False, 
     tool_ids=[tool.id, send_message_tool],
diff --git a/letta/__init__.py b/letta/__init__.py
index 73c21a5c50..11f10a4154 100644
--- a/letta/__init__.py
+++ b/letta/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.16"
+__version__ = "0.6.17"
 
 
 # import clients
diff --git a/letta/agent.py b/letta/agent.py
index 9ff0f437ad..fefca2f55b 100644
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -108,9 +108,6 @@ def __init__(
                 if not isinstance(rule, TerminalToolRule):
                     warnings.warn("Tool rules only work reliably for the latest OpenAI models that support structured outputs.")
                     break
-        # add default rule for having send_message be a terminal tool
-        if agent_state.tool_rules is None:
-            agent_state.tool_rules = []
 
         self.tool_rules_solver = ToolRulesSolver(tool_rules=agent_state.tool_rules)
 
diff --git a/letta/client/client.py b/letta/client/client.py
index 38c5143f4c..a2b62fa7cf 100644
--- a/letta/client/client.py
+++ b/letta/client/client.py
@@ -280,7 +280,7 @@ def get_archival_memory(
 
     def get_messages(
         self, agent_id: str, after: Optional[str] = None, before: Optional[str] = None, limit: Optional[int] = 1000
-    ) -> List[Message]:
+    ) -> List[LettaMessage]:
         raise NotImplementedError
 
     def list_model_configs(self) -> List[LLMConfig]:
@@ -812,7 +812,6 @@ def get_in_context_memory(self, agent_id: str) -> Memory:
         Returns:
             memory (Memory): In-context memory of the agent
         """
-
         response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/core-memory", headers=self.headers)
         if response.status_code != 200:
             raise ValueError(f"Failed to get in-context memory: {response.text}")
@@ -966,7 +965,7 @@ def delete_archival_memory(self, agent_id: str, memory_id: str):
 
     def get_messages(
         self, agent_id: str, before: Optional[str] = None, after: Optional[str] = None, limit: Optional[int] = 1000
-    ) -> List[Message]:
+    ) -> List[LettaMessage]:
         """
         Get messages from an agent with pagination.
 
@@ -984,7 +983,7 @@ def get_messages(
         response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/messages", params=params, headers=self.headers)
         if response.status_code != 200:
             raise ValueError(f"Failed to get messages: {response.text}")
-        return [Message(**message) for message in response.json()]
+        return [LettaMessage(**message) for message in response.json()]
 
     def send_message(
         self,
@@ -3356,7 +3355,7 @@ def get_archival_memory(
 
     def get_messages(
         self, agent_id: str, before: Optional[str] = None, after: Optional[str] = None, limit: Optional[int] = 1000
-    ) -> List[Message]:
+    ) -> List[LettaMessage]:
         """
         Get messages from an agent with pagination.
 
@@ -3378,6 +3377,7 @@ def get_messages(
             after=after,
             limit=limit,
             reverse=True,
+            return_message_object=False,
         )
 
     def list_blocks(self, label: Optional[str] = None, templates_only: Optional[bool] = True) -> List[Block]:
diff --git a/letta/client/streaming.py b/letta/client/streaming.py
index f48c158ee9..b8c8aeb582 100644
--- a/letta/client/streaming.py
+++ b/letta/client/streaming.py
@@ -1,18 +1,22 @@
 import json
-from typing import Generator
+from typing import Generator, Union, get_args
 
 import httpx
 from httpx_sse import SSEError, connect_sse
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
 from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
 from letta.errors import LLMError
+from letta.log import get_logger
 from letta.schemas.enums import MessageStreamStatus
 from letta.schemas.letta_message import AssistantMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage
 from letta.schemas.letta_response import LettaStreamingResponse
 from letta.schemas.usage import LettaUsageStatistics
 
+logger = get_logger(__name__)
 
-def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingResponse, None, None]:
+
+def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStreamingResponse, ChatCompletionChunk], None, None]:
 
     with httpx.Client() as client:
         with connect_sse(client, method="POST", url=url, json=data, headers=headers) as event_source:
@@ -20,22 +24,26 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingRe
             # Inspect for errors before iterating (see https://github.com/florimondmanca/httpx-sse/pull/12)
             if not event_source.response.is_success:
                 # handle errors
-                from letta.utils import printd
+                pass
 
-                printd("Caught error before iterating SSE request:", vars(event_source.response))
-                printd(event_source.response.read())
+                logger.warning("Caught error before iterating SSE request:", vars(event_source.response))
+                logger.warning(event_source.response.read().decode("utf-8"))
 
                 try:
                     response_bytes = event_source.response.read()
                     response_dict = json.loads(response_bytes.decode("utf-8"))
-                    error_message = response_dict["error"]["message"]
                     # e.g.: This model's maximum context length is 8192 tokens. However, your messages resulted in 8198 tokens (7450 in the messages, 748 in the functions). Please reduce the length of the messages or functions.
-                    if OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in error_message:
-                        raise LLMError(error_message)
+                    if (
+                        "error" in response_dict
+                        and "message" in response_dict["error"]
+                        and OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in response_dict["error"]["message"]
+                    ):
+                        logger.error(response_dict["error"]["message"])
+                        raise LLMError(response_dict["error"]["message"])
                 except LLMError:
                     raise
                 except:
-                    print(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
+                    logger.error(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
                     event_source.response.raise_for_status()
 
             try:
@@ -58,33 +66,34 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingRe
                             yield ToolReturnMessage(**chunk_data)
                         elif "step_count" in chunk_data:
                             yield LettaUsageStatistics(**chunk_data)
+                        elif chunk_data.get("object") == get_args(ChatCompletionChunk.__annotations__["object"])[0]:
+                            yield ChatCompletionChunk(**chunk_data)  # Add your processing logic for chat chunks here
                         else:
                             raise ValueError(f"Unknown message type in chunk_data: {chunk_data}")
 
             except SSEError as e:
-                print("Caught an error while iterating the SSE stream:", str(e))
+                logger.error("Caught an error while iterating the SSE stream:", str(e))
                 if "application/json" in str(e):  # Check if the error is because of JSON response
                     # TODO figure out a better way to catch the error other than re-trying with a POST
                     response = client.post(url=url, json=data, headers=headers)  # Make the request again to get the JSON response
                     if response.headers["Content-Type"].startswith("application/json"):
                         error_details = response.json()  # Parse the JSON to get the error message
-                        print("Request:", vars(response.request))
-                        print("POST Error:", error_details)
-                        print("Original SSE Error:", str(e))
+                        logger.error("Request:", vars(response.request))
+                        logger.error("POST Error:", error_details)
+                        logger.error("Original SSE Error:", str(e))
                     else:
-                        print("Failed to retrieve JSON error message via retry.")
+                        logger.error("Failed to retrieve JSON error message via retry.")
                 else:
-                    print("SSEError not related to 'application/json' content type.")
+                    logger.error("SSEError not related to 'application/json' content type.")
 
                 # Optionally re-raise the exception if you need to propagate it
                 raise e
 
             except Exception as e:
                 if event_source.response.request is not None:
-                    print("HTTP Request:", vars(event_source.response.request))
+                    logger.error("HTTP Request:", vars(event_source.response.request))
                 if event_source.response is not None:
-                    print("HTTP Status:", event_source.response.status_code)
-                    print("HTTP Headers:", event_source.response.headers)
-                    # print("HTTP Body:", event_source.response.text)
-                print("Exception message:", str(e))
+                    logger.error("HTTP Status:", event_source.response.status_code)
+                    logger.error("HTTP Headers:", event_source.response.headers)
+                logger.error("Exception message:", str(e))
                 raise e
diff --git a/letta/constants.py b/letta/constants.py
index ee062cdac7..acaaca2cbd 100644
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -50,7 +50,7 @@
 # Base memory tools CAN be edited, and are added by default by the server
 BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
 # Multi agent tools
-MULTI_AGENT_TOOLS = ["send_message_to_specific_agent", "send_message_to_agents_matching_all_tags"]
+MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_all_tags", "send_message_to_agent_async"]
 MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES = 3
 MULTI_AGENT_SEND_MESSAGE_TIMEOUT = 20 * 60
 
diff --git a/letta/functions/function_sets/multi_agent.py b/letta/functions/function_sets/multi_agent.py
index 40202ed9ed..a8641b2f7d 100644
--- a/letta/functions/function_sets/multi_agent.py
+++ b/letta/functions/function_sets/multi_agent.py
@@ -1,80 +1,86 @@
 import asyncio
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List
 
 from letta.constants import MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES, MULTI_AGENT_SEND_MESSAGE_TIMEOUT
-from letta.functions.helpers import async_send_message_with_retries
-from letta.orm.errors import NoResultFound
+from letta.functions.helpers import async_send_message_with_retries, execute_send_message_to_agent, fire_and_forget_send_to_agent
+from letta.schemas.enums import MessageRole
+from letta.schemas.message import MessageCreate
 from letta.server.rest_api.utils import get_letta_server
 
 if TYPE_CHECKING:
     from letta.agent import Agent
 
 
-def send_message_to_specific_agent(self: "Agent", message: str, other_agent_id: str) -> Optional[str]:
+def send_message_to_agent_and_wait_for_reply(self: "Agent", message: str, other_agent_id: str) -> str:
     """
-    Send a message to a specific Letta agent within the same organization.
+    Sends a message to a specific Letta agent within the same organization and waits for a response. The sender's identity is automatically included, so no explicit introduction is needed in the message. This function is designed for two-way communication where a reply is expected.
 
     Args:
-        message (str): The message to be sent to the target Letta agent.
-        other_agent_id (str): The identifier of the target Letta agent.
+        message (str): The content of the message to be sent to the target agent.
+        other_agent_id (str): The unique identifier of the target Letta agent.
 
     Returns:
-        Optional[str]: The response from the Letta agent. It's possible that the agent does not respond.
+        str: The response from the target agent.
     """
-    server = get_letta_server()
+    messages = [MessageCreate(role=MessageRole.user, content=message, name=self.agent_state.name)]
+    return execute_send_message_to_agent(
+        sender_agent=self,
+        messages=messages,
+        other_agent_id=other_agent_id,
+        log_prefix="[send_message_to_agent_and_wait_for_reply]",
+    )
 
-    # Ensure the target agent is in the same org
-    try:
-        server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=self.user)
-    except NoResultFound:
-        raise ValueError(
-            f"The passed-in agent_id {other_agent_id} either does not exist, "
-            f"or does not belong to the same org ({self.user.organization_id})."
-        )
-
-    # Async logic to send a message with retries and timeout
-    async def async_send_single_agent():
-        return await async_send_message_with_retries(
-            server=server,
-            sender_agent=self,
-            target_agent_id=other_agent_id,
-            message_text=message,
-            max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,  # or your chosen constants
-            timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,  # e.g., 1200 for 20 min
-            logging_prefix="[send_message_to_specific_agent]",
-        )
-
-    # Run in the current event loop or create one if needed
-    try:
-        return asyncio.run(async_send_single_agent())
-    except RuntimeError:
-        # e.g., in case there's already an active loop
-        loop = asyncio.get_event_loop()
-        if loop.is_running():
-            return loop.run_until_complete(async_send_single_agent())
-        else:
-            raise
 
+def send_message_to_agent_async(self: "Agent", message: str, other_agent_id: str) -> str:
+    """
+    Sends a message to a specific Letta agent within the same organization. The sender's identity is automatically included, so no explicit introduction is required in the message. This function does not expect a response from the target agent, making it suitable for notifications or one-way communication.
 
-def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags: List[str]) -> List[str]:
+    Args:
+        message (str): The content of the message to be sent to the target agent.
+        other_agent_id (str): The unique identifier of the target Letta agent.
+
+    Returns:
+        str: A confirmation message indicating the message was successfully sent.
     """
-    Send a message to all agents in the same organization that match ALL of the given tags.
+    message = (
+        f"[Incoming message from agent with ID '{self.agent_state.id}' - to reply to this message, "
+        f"make sure to use the 'send_message_to_agent_async' tool, or the agent will not receive your message] "
+        f"{message}"
+    )
+    messages = [MessageCreate(role=MessageRole.system, content=message, name=self.agent_state.name)]
+
+    # Do the actual fire-and-forget
+    fire_and_forget_send_to_agent(
+        sender_agent=self,
+        messages=messages,
+        other_agent_id=other_agent_id,
+        log_prefix="[send_message_to_agent_async]",
+        use_retries=False,  # or True if you want to use async_send_message_with_retries
+    )
+
+    # Immediately return to caller
+    return "Successfully sent message"
 
-    Messages are sent in parallel for improved performance, with retries on flaky calls and timeouts for long-running requests.
-    This function does not use a cursor (pagination) and enforces a limit of 100 agents.
+
+def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags: List[str]) -> List[str]:
+    """
+    Sends a message to all agents within the same organization that match all of the specified tags. Messages are dispatched in parallel for improved performance, with retries to handle transient issues and timeouts to ensure responsiveness. This function enforces a limit of 100 agents and does not support pagination (cursor-based queries). Each agent must match all specified tags (`match_all_tags=True`) to be included.
 
     Args:
-        message (str): The message to be sent to each matching agent.
-        tags (List[str]): The list of tags that each agent must have (match_all_tags=True).
+        message (str): The content of the message to be sent to each matching agent.
+        tags (List[str]): A list of tags that an agent must possess to receive the message.
 
     Returns:
-        List[str]: A list of responses from the agents that match all tags.
-                   Each response corresponds to one agent.
+        List[str]: A list of responses from the agents that matched all tags. Each
+        response corresponds to a single agent. Agents that do not respond will not
+        have an entry in the returned list.
     """
+
     server = get_letta_server()
 
     # Retrieve agents that match ALL specified tags
     matching_agents = server.agent_manager.list_agents(actor=self.user, tags=tags, match_all_tags=True, limit=100)
+    messages = [MessageCreate(role=MessageRole.user, content=message, name=self.agent_state.name)]
 
     async def send_messages_to_all_agents():
         tasks = [
@@ -82,7 +88,7 @@ async def send_messages_to_all_agents():
                 server=server,
                 sender_agent=self,
                 target_agent_id=agent_state.id,
-                message_text=message,
+                messages=messages,
                 max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
                 timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
                 logging_prefix="[send_message_to_agents_matching_all_tags]",
diff --git a/letta/functions/functions.py b/letta/functions/functions.py
index 4195cbeec8..d5e9d08832 100644
--- a/letta/functions/functions.py
+++ b/letta/functions/functions.py
@@ -122,7 +122,6 @@ def get_json_schema_from_module(module_name: str, function_name: str) -> dict:
         generated_schema = generate_schema(attr)
 
         return generated_schema
-
     except ModuleNotFoundError:
         raise ModuleNotFoundError(f"Module '{module_name}' not found.")
     except AttributeError:
diff --git a/letta/functions/helpers.py b/letta/functions/helpers.py
index 1718ffef87..244921199f 100644
--- a/letta/functions/helpers.py
+++ b/letta/functions/helpers.py
@@ -1,15 +1,25 @@
+import asyncio
 import json
-from typing import Any, Optional, Union
+import threading
+from random import uniform
+from typing import Any, List, Optional, Union
 
 import humps
 from composio.constants import DEFAULT_ENTITY_ID
 from pydantic import BaseModel
 
-from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
-from letta.schemas.enums import MessageRole
+from letta.constants import (
+    COMPOSIO_ENTITY_ENV_VAR_KEY,
+    DEFAULT_MESSAGE_TOOL,
+    DEFAULT_MESSAGE_TOOL_KWARG,
+    MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
+    MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
+)
+from letta.orm.errors import NoResultFound
 from letta.schemas.letta_message import AssistantMessage, ReasoningMessage, ToolCallMessage
 from letta.schemas.letta_response import LettaResponse
 from letta.schemas.message import MessageCreate
+from letta.server.rest_api.utils import get_letta_server
 
 
 # TODO: This is kind of hacky, as this is used to search up the action later on composio's side
@@ -259,16 +269,63 @@ def parse_letta_response_for_assistant_message(
     return None
 
 
-import asyncio
-from random import uniform
-from typing import Optional
+def execute_send_message_to_agent(
+    sender_agent: "Agent",
+    messages: List[MessageCreate],
+    other_agent_id: str,
+    log_prefix: str,
+) -> Optional[str]:
+    """
+    Helper function to send a message to a specific Letta agent.
+
+    Args:
+        sender_agent ("Agent"): The sender agent object.
+        message (str): The message to send.
+        other_agent_id (str): The identifier of the target Letta agent.
+        log_prefix (str): Logging prefix for retries.
+
+    Returns:
+        Optional[str]: The response from the Letta agent if required by the caller.
+    """
+    server = get_letta_server()
+
+    # Ensure the target agent is in the same org
+    try:
+        server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=sender_agent.user)
+    except NoResultFound:
+        raise ValueError(
+            f"The passed-in agent_id {other_agent_id} either does not exist, "
+            f"or does not belong to the same org ({sender_agent.user.organization_id})."
+        )
+
+    # Async logic to send a message with retries and timeout
+    async def async_send():
+        return await async_send_message_with_retries(
+            server=server,
+            sender_agent=sender_agent,
+            target_agent_id=other_agent_id,
+            messages=messages,
+            max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
+            timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
+            logging_prefix=log_prefix,
+        )
+
+    # Run in the current event loop or create one if needed
+    try:
+        return asyncio.run(async_send())
+    except RuntimeError:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            return loop.run_until_complete(async_send())
+        else:
+            raise
 
 
 async def async_send_message_with_retries(
     server,
     sender_agent: "Agent",
     target_agent_id: str,
-    message_text: str,
+    messages: List[MessageCreate],
     max_retries: int,
     timeout: int,
     logging_prefix: Optional[str] = None,
@@ -290,7 +347,6 @@ async def async_send_message_with_retries(
     logging_prefix = logging_prefix or "[async_send_message_with_retries]"
     for attempt in range(1, max_retries + 1):
         try:
-            messages = [MessageCreate(role=MessageRole.user, content=message_text, name=sender_agent.agent_state.name)]
             # Wrap in a timeout
             response = await asyncio.wait_for(
                 server.send_message_to_agent(
@@ -334,4 +390,88 @@ async def async_send_message_with_retries(
             await asyncio.sleep(backoff)
         else:
             sender_agent.logger.error(f"{logging_prefix} - Fatal error during agent to agent send_message: {error_msg}")
-            return error_msg
+            raise Exception(error_msg)
+
+
+def fire_and_forget_send_to_agent(
+    sender_agent: "Agent",
+    messages: List[MessageCreate],
+    other_agent_id: str,
+    log_prefix: str,
+    use_retries: bool = False,
+) -> None:
+    """
+    Fire-and-forget send of messages to a specific agent.
+    Returns immediately in the calling thread, never blocks.
+
+    Args:
+        sender_agent (Agent): The sender agent object.
+        server: The Letta server instance
+        messages (List[MessageCreate]): The messages to send.
+        other_agent_id (str): The ID of the target agent.
+        log_prefix (str): Prefix for logging.
+        use_retries (bool): If True, uses async_send_message_with_retries;
+                            if False, calls server.send_message_to_agent directly.
+    """
+    server = get_letta_server()
+
+    # 1) Validate the target agent (raises ValueError if not in same org)
+    try:
+        server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=sender_agent.user)
+    except NoResultFound:
+        raise ValueError(
+            f"The passed-in agent_id {other_agent_id} either does not exist, "
+            f"or does not belong to the same org ({sender_agent.user.organization_id})."
+        )
+
+    # 2) Define the async coroutine to run
+    async def background_task():
+        try:
+            if use_retries:
+                result = await async_send_message_with_retries(
+                    server=server,
+                    sender_agent=sender_agent,
+                    target_agent_id=other_agent_id,
+                    messages=messages,
+                    max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
+                    timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
+                    logging_prefix=log_prefix,
+                )
+                sender_agent.logger.info(f"{log_prefix} fire-and-forget success with retries: {result}")
+            else:
+                # Direct call to server.send_message_to_agent, no retry logic
+                await server.send_message_to_agent(
+                    agent_id=other_agent_id,
+                    actor=sender_agent.user,
+                    messages=messages,
+                    stream_steps=False,
+                    stream_tokens=False,
+                    use_assistant_message=True,
+                    assistant_message_tool_name=DEFAULT_MESSAGE_TOOL,
+                    assistant_message_tool_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
+                )
+                sender_agent.logger.info(f"{log_prefix} fire-and-forget success (no retries).")
+        except Exception as e:
+            sender_agent.logger.error(f"{log_prefix} fire-and-forget send failed: {e}")
+
+    # 3) Helper to run the coroutine in a brand-new event loop in a separate thread
+    def run_in_background_thread(coro):
+        def runner():
+            loop = asyncio.new_event_loop()
+            try:
+                asyncio.set_event_loop(loop)
+                loop.run_until_complete(coro)
+            finally:
+                loop.close()
+
+        thread = threading.Thread(target=runner, daemon=True)
+        thread.start()
+
+    # 4) Try to schedule the coroutine in an existing loop, else spawn a thread
+    try:
+        loop = asyncio.get_running_loop()
+        # If we get here, a loop is running; schedule the coroutine in background
+        loop.create_task(background_task())
+    except RuntimeError:
+        # Means no event loop is running in this thread
+        run_in_background_thread(background_task())
diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index c6e8d63a66..fe198453f8 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -29,6 +29,7 @@
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.settings import ModelSettings
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
+from letta.utils import run_async_task
 
 LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
 
@@ -156,21 +157,25 @@ def create(
             assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
                 stream_interface, AgentRefreshStreamingInterface
             ), type(stream_interface)
-            response = openai_chat_completions_process_stream(
-                url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
-                api_key=model_settings.openai_api_key,
-                chat_completion_request=data,
-                stream_interface=stream_interface,
+            response = run_async_task(
+                openai_chat_completions_process_stream(
+                    url=llm_config.model_endpoint,
+                    api_key=model_settings.openai_api_key,
+                    chat_completion_request=data,
+                    stream_interface=stream_interface,
+                )
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
             if isinstance(stream_interface, AgentChunkStreamingInterface):
                 stream_interface.stream_start()
             try:
-                response = openai_chat_completions_request(
-                    url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
-                    api_key=model_settings.openai_api_key,
-                    chat_completion_request=data,
+                response = run_async_task(
+                    openai_chat_completions_request(
+                        url=llm_config.model_endpoint,
+                        api_key=model_settings.openai_api_key,
+                        chat_completion_request=data,
+                    )
                 )
             finally:
                 if isinstance(stream_interface, AgentChunkStreamingInterface):
@@ -344,9 +349,12 @@ def create(
             stream_interface.stream_start()
         try:
             # groq uses the openai chat completions API, so this component should be reusable
-            response = openai_chat_completions_request(
-                api_key=model_settings.groq_api_key,
-                chat_completion_request=data,
+            response = run_async_task(
+                openai_chat_completions_request(
+                    url=llm_config.model_endpoint,
+                    api_key=model_settings.groq_api_key,
+                    chat_completion_request=data,
+                )
             )
         finally:
             if isinstance(stream_interface, AgentChunkStreamingInterface):
diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py
index ca0c25f211..d931e8fb7e 100644
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -1,8 +1,8 @@
 import warnings
-from typing import Generator, List, Optional, Union
+from typing import AsyncGenerator, List, Optional, Union
 
 import requests
-from openai import OpenAI
+from openai import AsyncOpenAI
 
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
@@ -158,7 +158,7 @@ def build_openai_chat_completions_request(
     return data
 
 
-def openai_chat_completions_process_stream(
+async def openai_chat_completions_process_stream(
     url: str,
     api_key: str,
     chat_completion_request: ChatCompletionRequest,
@@ -229,9 +229,10 @@ def openai_chat_completions_process_stream(
         stream_interface.stream_start()
 
     n_chunks = 0  # approx == n_tokens
+    chunk_idx = 0
     try:
-        for chunk_idx, chat_completion_chunk in enumerate(
-            openai_chat_completions_request_stream(url=url, api_key=api_key, chat_completion_request=chat_completion_request)
+        async for chat_completion_chunk in openai_chat_completions_request_stream(
+            url=url, api_key=api_key, chat_completion_request=chat_completion_request
         ):
             assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
 
@@ -348,6 +349,7 @@ def openai_chat_completions_process_stream(
 
             # increment chunk counter
             n_chunks += 1
+            chunk_idx += 1
 
     except Exception as e:
         if stream_interface:
@@ -380,24 +382,24 @@ def openai_chat_completions_process_stream(
     return chat_completion_response
 
 
-def openai_chat_completions_request_stream(
+async def openai_chat_completions_request_stream(
     url: str,
     api_key: str,
     chat_completion_request: ChatCompletionRequest,
-) -> Generator[ChatCompletionChunkResponse, None, None]:
+) -> AsyncGenerator[ChatCompletionChunkResponse, None]:
     data = prepare_openai_payload(chat_completion_request)
     data["stream"] = True
-    client = OpenAI(
+    client = AsyncOpenAI(
         api_key=api_key,
         base_url=url,
     )
-    stream = client.chat.completions.create(**data)
-    for chunk in stream:
+    stream = await client.chat.completions.create(**data)
+    async for chunk in stream:
         # TODO: Use the native OpenAI objects here?
         yield ChatCompletionChunkResponse(**chunk.model_dump(exclude_none=True))
 
 
-def openai_chat_completions_request(
+async def openai_chat_completions_request(
     url: str,
     api_key: str,
     chat_completion_request: ChatCompletionRequest,
@@ -410,8 +412,8 @@ def openai_chat_completions_request(
     https://platform.openai.com/docs/guides/text-generation?lang=curl
     """
     data = prepare_openai_payload(chat_completion_request)
-    client = OpenAI(api_key=api_key, base_url=url)
-    chat_completion = client.chat.completions.create(**data)
+    client = AsyncOpenAI(api_key=api_key, base_url=url)
+    chat_completion = await client.chat.completions.create(**data)
     return ChatCompletionResponse(**chat_completion.model_dump())
 
 
diff --git a/letta/orm/agent.py b/letta/orm/agent.py
index 781ab3836c..515f77c2e9 100644
--- a/letta/orm/agent.py
+++ b/letta/orm/agent.py
@@ -4,6 +4,7 @@
 from sqlalchemy import JSON, Index, String
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
+from letta.constants import MULTI_AGENT_TOOLS
 from letta.orm.block import Block
 from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ToolRulesColumn
 from letta.orm.message import Message
@@ -15,7 +16,7 @@
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
-from letta.schemas.tool_rule import ToolRule
+from letta.schemas.tool_rule import TerminalToolRule, ToolRule
 
 if TYPE_CHECKING:
     from letta.orm.agents_tags import AgentsTags
@@ -114,6 +115,16 @@ class Agent(SqlalchemyBase, OrganizationMixin):
 
     def to_pydantic(self) -> PydanticAgentState:
         """converts to the basic pydantic model counterpart"""
+        # add default rule for having send_message be a terminal tool
+        tool_rules = self.tool_rules
+        if not tool_rules:
+            tool_rules = [
+                TerminalToolRule(tool_name="send_message"),
+            ]
+
+            for tool_name in MULTI_AGENT_TOOLS:
+                tool_rules.append(TerminalToolRule(tool_name=tool_name))
+
         state = {
             "id": self.id,
             "organization_id": self.organization_id,
@@ -123,7 +134,7 @@ def to_pydantic(self) -> PydanticAgentState:
             "tools": self.tools,
             "sources": [source.to_pydantic() for source in self.sources],
             "tags": [t.tag for t in self.tags],
-            "tool_rules": self.tool_rules,
+            "tool_rules": tool_rules,
             "system": self.system,
             "agent_type": self.agent_type,
             "llm_config": self.llm_config,
@@ -136,4 +147,5 @@ def to_pydantic(self) -> PydanticAgentState:
             "updated_at": self.updated_at,
             "tool_exec_environment_variables": self.tool_exec_environment_variables,
         }
+
         return self.__pydantic_model__(**state)
diff --git a/letta/orm/job.py b/letta/orm/job.py
index a99b542c67..5890336545 100644
--- a/letta/orm/job.py
+++ b/letta/orm/job.py
@@ -9,7 +9,7 @@
 from letta.orm.sqlalchemy_base import SqlalchemyBase
 from letta.schemas.enums import JobStatus
 from letta.schemas.job import Job as PydanticJob
-from letta.schemas.letta_request import LettaRequestConfig
+from letta.schemas.job import LettaRequestConfig
 
 if TYPE_CHECKING:
     from letta.orm.job_messages import JobMessage
diff --git a/letta/orm/sqlalchemy_base.py b/letta/orm/sqlalchemy_base.py
index 375417f89a..8cdd686a55 100644
--- a/letta/orm/sqlalchemy_base.py
+++ b/letta/orm/sqlalchemy_base.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 from enum import Enum
 from functools import wraps
+from pprint import pformat
 from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union
 
 from sqlalchemy import String, and_, func, or_, select
@@ -504,7 +505,14 @@ def to_pydantic(self) -> "BaseModel":
             model.metadata = self.metadata_
         return model
 
-    def to_record(self) -> "BaseModel":
-        """Deprecated accessor for to_pydantic"""
-        logger.warning("to_record is deprecated, use to_pydantic instead.")
-        return self.to_pydantic()
+    def pretty_print_columns(self) -> str:
+        """
+        Pretty prints all columns of the current SQLAlchemy object along with their values.
+        """
+        if not hasattr(self, "__table__") or not hasattr(self.__table__, "columns"):
+            raise NotImplementedError("This object does not have a '__table__.columns' attribute.")
+
+        # Iterate over the columns correctly
+        column_data = {column.name: getattr(self, column.name, None) for column in self.__table__.columns}
+
+        return pformat(column_data, indent=4, sort_dicts=True)
diff --git a/letta/schemas/job.py b/letta/schemas/job.py
index 35ea9cd736..3d5c3b2c05 100644
--- a/letta/schemas/job.py
+++ b/letta/schemas/job.py
@@ -1,8 +1,9 @@
 from datetime import datetime
 from typing import Optional
 
-from pydantic import Field
+from pydantic import BaseModel, Field
 
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.orm.enums import JobType
 from letta.schemas.enums import JobStatus
 from letta.schemas.letta_base import OrmMetadataBase
@@ -38,3 +39,18 @@ class JobUpdate(JobBase):
 
     class Config:
         extra = "ignore"  # Ignores extra fields
+
+
+class LettaRequestConfig(BaseModel):
+    use_assistant_message: bool = Field(
+        default=True,
+        description="Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects.",
+    )
+    assistant_message_tool_name: str = Field(
+        default=DEFAULT_MESSAGE_TOOL,
+        description="The name of the designated message tool.",
+    )
+    assistant_message_tool_kwarg: str = Field(
+        default=DEFAULT_MESSAGE_TOOL_KWARG,
+        description="The name of the message argument in the designated message tool.",
+    )
diff --git a/letta/schemas/letta_request.py b/letta/schemas/letta_request.py
index 663dba14a8..2547fe680f 100644
--- a/letta/schemas/letta_request.py
+++ b/letta/schemas/letta_request.py
@@ -6,8 +6,8 @@
 from letta.schemas.message import MessageCreate
 
 
-class LettaRequestConfig(BaseModel):
-    # Flags to support the use of AssistantMessage message types
+class LettaRequest(BaseModel):
+    messages: List[MessageCreate] = Field(..., description="The messages to be sent to the agent.")
     use_assistant_message: bool = Field(
         default=True,
         description="Whether the server should parse specific tool call arguments (default `send_message`) as `AssistantMessage` objects.",
@@ -22,11 +22,6 @@ class LettaRequestConfig(BaseModel):
     )
 
 
-class LettaRequest(BaseModel):
-    messages: List[MessageCreate] = Field(..., description="The messages to be sent to the agent.")
-    config: LettaRequestConfig = Field(default=LettaRequestConfig(), description="Configuration options for the LettaRequest.")
-
-
 class LettaStreamingRequest(LettaRequest):
     stream_tokens: bool = Field(
         default=False,
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 05d6653e0c..6e87e629eb 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -88,6 +88,7 @@ def default_config(cls, model_name: str):
                 model_endpoint="https://api.openai.com/v1",
                 model_wrapper=None,
                 context_window=8192,
+                put_inner_thoughts_in_kwargs=True,
             )
         elif model_name == "gpt-4o-mini":
             return cls(
@@ -97,6 +98,14 @@ def default_config(cls, model_name: str):
                 model_wrapper=None,
                 context_window=128000,
             )
+        elif model_name == "gpt-4o":
+            return cls(
+                model="gpt-4o",
+                model_endpoint_type="openai",
+                model_endpoint="https://api.openai.com/v1",
+                model_wrapper=None,
+                context_window=128000,
+            )
         elif model_name == "letta":
             return cls(
                 model="memgpt-openai",
diff --git a/letta/schemas/message.py b/letta/schemas/message.py
index b865671d03..c9887772d1 100644
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import copy
 import json
 import warnings
@@ -25,6 +27,7 @@
     ToolReturnMessage,
     UserMessage,
 )
+from letta.system import unpack_message
 from letta.utils import get_utc_time, is_utc_datetime, json_dumps
 
 
@@ -176,9 +179,47 @@ def to_json(self):
         json_message["created_at"] = self.created_at.isoformat()
         return json_message
 
+    @staticmethod
+    def to_letta_messages_from_list(
+        messages: List[Message],
+        use_assistant_message: bool = True,
+        assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
+        assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
+    ) -> List[LettaMessage]:
+        if use_assistant_message:
+            message_ids_to_remove = []
+            assistant_messages_by_tool_call = {
+                tool_call.id: msg
+                for msg in messages
+                if msg.role == MessageRole.assistant and msg.tool_calls
+                for tool_call in msg.tool_calls
+            }
+            for message in messages:
+                if (
+                    message.role == MessageRole.tool
+                    and message.tool_call_id in assistant_messages_by_tool_call
+                    and assistant_messages_by_tool_call[message.tool_call_id].tool_calls
+                    and assistant_message_tool_name
+                    in [tool_call.function.name for tool_call in assistant_messages_by_tool_call[message.tool_call_id].tool_calls]
+                ):
+                    message_ids_to_remove.append(message.id)
+
+            messages = [msg for msg in messages if msg.id not in message_ids_to_remove]
+
+        # Convert messages to LettaMessages
+        return [
+            msg
+            for m in messages
+            for msg in m.to_letta_message(
+                use_assistant_message=use_assistant_message,
+                assistant_message_tool_name=assistant_message_tool_name,
+                assistant_message_tool_kwarg=assistant_message_tool_kwarg,
+            )
+        ]
+
     def to_letta_message(
         self,
-        assistant_message: bool = False,
+        use_assistant_message: bool = False,
         assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
         assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
     ) -> List[LettaMessage]:
@@ -201,7 +242,7 @@ def to_letta_message(
                 for tool_call in self.tool_calls:
                     # If we're supporting using assistant message,
                     # then we want to treat certain function calls as a special case
-                    if assistant_message and tool_call.function.name == assistant_message_tool_name:
+                    if use_assistant_message and tool_call.function.name == assistant_message_tool_name:
                         # We need to unpack the actual message contents from the function call
                         try:
                             func_args = json.loads(tool_call.function.arguments)
@@ -264,11 +305,12 @@ def to_letta_message(
         elif self.role == MessageRole.user:
             # This is type UserMessage
             assert self.text is not None, self
+            message_str = unpack_message(self.text)
             messages.append(
                 UserMessage(
                     id=self.id,
                     date=self.created_at,
-                    content=self.text,
+                    content=message_str or self.text,
                 )
             )
         elif self.role == MessageRole.system:
@@ -311,26 +353,13 @@ def dict_to_message(
             assert "tool_call_id" in openai_message_dict, openai_message_dict
 
             # Convert from 'function' response to a 'tool' response
-            # NOTE: this does not conventionally include a tool_call_id, it's on the caster to provide it
-            message_args = dict(
-                user_id=user_id,
-                agent_id=agent_id,
-                model=model,
-                # standard fields expected in an OpenAI ChatCompletion message object
-                role=MessageRole.tool,  # NOTE
-                text=openai_message_dict["content"],
-                name=openai_message_dict["name"] if "name" in openai_message_dict else None,
-                tool_calls=openai_message_dict["tool_calls"] if "tool_calls" in openai_message_dict else None,
-                tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
-                created_at=created_at,
-            )
             if id is not None:
                 return Message(
                     agent_id=agent_id,
                     model=model,
                     # standard fields expected in an OpenAI ChatCompletion message object
                     role=MessageRole.tool,  # NOTE
-                    content=[TextContent(text=openai_message_dict["content"])],
+                    content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
                     name=openai_message_dict["name"] if "name" in openai_message_dict else None,
                     tool_calls=openai_message_dict["tool_calls"] if "tool_calls" in openai_message_dict else None,
                     tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
@@ -343,7 +372,7 @@ def dict_to_message(
                     model=model,
                     # standard fields expected in an OpenAI ChatCompletion message object
                     role=MessageRole.tool,  # NOTE
-                    content=[TextContent(text=openai_message_dict["content"])],
+                    content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
                     name=openai_message_dict["name"] if "name" in openai_message_dict else None,
                     tool_calls=openai_message_dict["tool_calls"] if "tool_calls" in openai_message_dict else None,
                     tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
@@ -375,7 +404,7 @@ def dict_to_message(
                     model=model,
                     # standard fields expected in an OpenAI ChatCompletion message object
                     role=MessageRole(openai_message_dict["role"]),
-                    content=[TextContent(text=openai_message_dict["content"])],
+                    content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
                     name=openai_message_dict["name"] if "name" in openai_message_dict else None,
                     tool_calls=tool_calls,
                     tool_call_id=None,  # NOTE: None, since this field is only non-null for role=='tool'
@@ -388,7 +417,7 @@ def dict_to_message(
                     model=model,
                     # standard fields expected in an OpenAI ChatCompletion message object
                     role=MessageRole(openai_message_dict["role"]),
-                    content=[TextContent(text=openai_message_dict["content"])],
+                    content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
                     name=openai_message_dict["name"] if "name" in openai_message_dict else None,
                     tool_calls=tool_calls,
                     tool_call_id=None,  # NOTE: None, since this field is only non-null for role=='tool'
@@ -420,7 +449,7 @@ def dict_to_message(
                     model=model,
                     # standard fields expected in an OpenAI ChatCompletion message object
                     role=MessageRole(openai_message_dict["role"]),
-                    content=[TextContent(text=openai_message_dict["content"])],
+                    content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
                     name=openai_message_dict["name"] if "name" in openai_message_dict else None,
                     tool_calls=tool_calls,
                     tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
@@ -433,7 +462,7 @@ def dict_to_message(
                     model=model,
                     # standard fields expected in an OpenAI ChatCompletion message object
                     role=MessageRole(openai_message_dict["role"]),
-                    content=[TextContent(text=openai_message_dict["content"] or "")],
+                    content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
                     name=openai_message_dict["name"] if "name" in openai_message_dict else None,
                     tool_calls=tool_calls,
                     tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
diff --git a/letta/schemas/openai/chat_completion_response.py b/letta/schemas/openai/chat_completion_response.py
index 07a1170354..e41859f0d5 100644
--- a/letta/schemas/openai/chat_completion_response.py
+++ b/letta/schemas/openai/chat_completion_response.py
@@ -116,7 +116,7 @@ class MessageDelta(BaseModel):
 
     content: Optional[str] = None
     tool_calls: Optional[List[ToolCallDelta]] = None
-    # role: Optional[str] = None
+    role: Optional[str] = None
     function_call: Optional[FunctionCallDelta] = None  # Deprecated
 
 
@@ -132,7 +132,7 @@ class ChatCompletionChunkResponse(BaseModel):
 
     id: str
     choices: List[ChunkChoice]
-    created: datetime.datetime
+    created: Union[datetime.datetime, str]
     model: str
     # system_fingerprint: str  # docs say this is mandatory, but in reality API returns None
     system_fingerprint: Optional[str] = None
diff --git a/letta/schemas/run.py b/letta/schemas/run.py
index b455a211f6..acbcccb567 100644
--- a/letta/schemas/run.py
+++ b/letta/schemas/run.py
@@ -3,8 +3,7 @@
 from pydantic import Field
 
 from letta.orm.enums import JobType
-from letta.schemas.job import Job, JobBase
-from letta.schemas.letta_request import LettaRequestConfig
+from letta.schemas.job import Job, JobBase, LettaRequestConfig
 
 
 class RunBase(JobBase):
diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py
index 3617aa5136..60a422ea31 100644
--- a/letta/server/rest_api/app.py
+++ b/letta/server/rest_api/app.py
@@ -12,7 +12,7 @@
 from starlette.middleware.cors import CORSMiddleware
 
 from letta.__init__ import __version__
-from letta.constants import ADMIN_PREFIX, API_PREFIX
+from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
 from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
 from letta.log import get_logger
 from letta.orm.errors import DatabaseTimeoutError, ForeignKeyConstraintViolationError, NoResultFound, UniqueConstraintViolationError
@@ -22,6 +22,7 @@
 # NOTE(charles): these are extra routes that are not part of v1 but we still need to mount to pass tests
 from letta.server.rest_api.auth.index import setup_auth_router  # TODO: probably remove right?
 from letta.server.rest_api.interface import StreamingServerInterface
+from letta.server.rest_api.routers.openai.chat_completions.chat_completions import router as openai_chat_completions_router
 
 # from letta.orm.utilities import get_db_session  # TODO(ethan) reenable once we merge ORM
 from letta.server.rest_api.routers.v1 import ROUTERS as v1_routes
@@ -241,6 +242,9 @@ async def bedrock_permission_error_handler(request, exc: BedrockPermissionError)
     app.include_router(users_router, prefix=ADMIN_PREFIX)
     app.include_router(organizations_router, prefix=ADMIN_PREFIX)
 
+    # openai
+    app.include_router(openai_chat_completions_router, prefix=OPENAI_API_PREFIX)
+
     # /api/auth endpoints
     app.include_router(setup_auth_router(server, interface, password), prefix=API_PREFIX)
 
diff --git a/letta/server/rest_api/chat_completions_interface.py b/letta/server/rest_api/chat_completions_interface.py
new file mode 100644
index 0000000000..1bd2bf2d06
--- /dev/null
+++ b/letta/server/rest_api/chat_completions_interface.py
@@ -0,0 +1,256 @@
+import asyncio
+from collections import deque
+from datetime import datetime
+from typing import AsyncGenerator, Optional, Union
+
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
+
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+from letta.log import get_logger
+from letta.schemas.enums import MessageStreamStatus
+from letta.schemas.letta_message import LettaMessage
+from letta.schemas.message import Message
+from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
+from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
+from letta.streaming_interface import AgentChunkStreamingInterface
+
+logger = get_logger(__name__)
+
+
+class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
+    """
+    Provides an asynchronous streaming mechanism for LLM output. Internally
+    maintains a queue of chunks that can be consumed via an async generator.
+
+    Key Behaviors:
+    - process_chunk: Accepts ChatCompletionChunkResponse objects (e.g. from an
+      OpenAI-like streaming API), potentially transforms them to a partial
+      text response, and enqueues them.
+    - get_generator: Returns an async generator that yields messages or status
+      markers as they become available.
+    - step_complete, step_yield: End streaming for the current step or entirely,
+      depending on the multi_step setting.
+    - function_message, internal_monologue: Handle LLM “function calls” and
+      “reasoning” messages for non-streaming contexts.
+    """
+
+    FINISH_REASON_STR = "stop"
+    ASSISTANT_STR = "assistant"
+
+    def __init__(
+        self,
+        multi_step: bool = True,
+        timeout: int = 150,
+        # The following are placeholders for potential expansions; they
+        # remain if you need to differentiate between actual "assistant messages"
+        # vs. tool calls. By default, they are set for the "send_message" tool usage.
+        assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
+        assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
+        inner_thoughts_in_kwargs: bool = True,
+        inner_thoughts_kwarg: str = INNER_THOUGHTS_KWARG,
+    ):
+        self.streaming_mode = True
+
+        # Parsing state for incremental function-call data
+        self.current_function_name = ""
+        self.current_function_arguments = []
+
+        # Internal chunk buffer and event for async notification
+        self._chunks = deque()
+        self._event = asyncio.Event()
+        self._active = True
+
+        # Whether or not the stream should remain open across multiple steps
+        self.multi_step = multi_step
+
+        # Timing / debug parameters
+        self.timeout = timeout
+
+        # These are placeholders to handle specialized
+        # assistant message logic or storing inner thoughts.
+        self.assistant_message_tool_name = assistant_message_tool_name
+        self.assistant_message_tool_kwarg = assistant_message_tool_kwarg
+        self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
+        self.inner_thoughts_kwarg = inner_thoughts_kwarg
+
+    async def _create_generator(
+        self,
+    ) -> AsyncGenerator[Union[LettaMessage, MessageStreamStatus], None]:
+        """
+        An asynchronous generator that yields queued items as they arrive.
+        Ends when _active is set to False or when timing out.
+        """
+        while self._active:
+            try:
+                await asyncio.wait_for(self._event.wait(), timeout=self.timeout)
+            except asyncio.TimeoutError:
+                break
+
+            while self._chunks:
+                yield self._chunks.popleft()
+
+            self._event.clear()
+
+    def get_generator(self) -> AsyncGenerator:
+        """
+        Provide the async generator interface. Will raise StopIteration
+        if the stream is inactive.
+        """
+        if not self._active:
+            raise StopIteration("The stream is not active.")
+        return self._create_generator()
+
+    def _push_to_buffer(
+        self,
+        item: ChatCompletionChunk,
+    ):
+        """
+        Add an item (a LettaMessage, status marker, or partial chunk)
+        to the queue and signal waiting consumers.
+        """
+        if not self._active:
+            raise RuntimeError("Attempted to push to an inactive stream.")
+        self._chunks.append(item)
+        self._event.set()
+
+    def stream_start(self) -> None:
+        """Initialize or reset the streaming state for a new request."""
+        self._active = True
+        self._chunks.clear()
+        self._event.clear()
+        self._reset_parsing_state()
+
+    def stream_end(self) -> None:
+        """
+        Clean up after the current streaming session. Typically called when the
+        request is done or the data source has signaled it has no more data.
+        """
+        self._reset_parsing_state()
+
+    def step_complete(self) -> None:
+        """
+        Indicate that one step of multi-step generation is done.
+        If multi_step=False, the stream is closed immediately.
+        """
+        if not self.multi_step:
+            self._active = False
+            self._event.set()  # Ensure waiting generators can finalize
+        self._reset_parsing_state()
+
+    def step_yield(self) -> None:
+        """
+        Explicitly end the stream in a multi-step scenario, typically
+        called when the entire chain of steps is complete.
+        """
+        self._active = False
+        self._event.set()
+
+    @staticmethod
+    def clear() -> None:
+        """No-op retained for interface compatibility."""
+        return
+
+    def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime) -> None:
+        """
+        Called externally with a ChatCompletionChunkResponse. Transforms
+        it if necessary, then enqueues partial messages for streaming back.
+        """
+        processed_chunk = self._process_chunk_to_openai_style(chunk)
+        if processed_chunk is not None:
+            self._push_to_buffer(processed_chunk)
+
+    def user_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
+        """
+        Handle user messages. Here, it's a no-op, but included if your
+        pipeline needs to respond to user messages distinctly.
+        """
+        return
+
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
+        """
+        Handle LLM reasoning or internal monologue. Example usage: if you want
+        to capture chain-of-thought for debugging in a non-streaming scenario.
+        """
+        return
+
+    def assistant_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
+        """
+        Handle direct assistant messages. This class primarily handles them
+        as function calls, so it's a no-op by default.
+        """
+        return
+
+    def function_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
+        """
+        Handle function-related log messages, typically of the form:
+        It's a no-op by default.
+        """
+        return
+
+    def _process_chunk_to_openai_style(self, chunk: ChatCompletionChunkResponse) -> Optional[ChatCompletionChunk]:
+        """
+        Optionally transform an inbound OpenAI-style chunk so that partial
+        content (especially from a 'send_message' tool) is exposed as text
+        deltas in 'content'. Otherwise, pass through or yield finish reasons.
+        """
+        choice = chunk.choices[0]
+        delta = choice.delta
+
+        # If there's direct content, we usually let it stream as-is
+        if delta.content is not None:
+            # TODO: Eventually use all of the native OpenAI objects
+            return ChatCompletionChunk(**chunk.model_dump(exclude_none=True))
+
+        # If there's a function call, accumulate its name/args. If it's a known
+        # text-producing function (like send_message), stream partial text.
+        if delta.tool_calls:
+            tool_call = delta.tool_calls[0]
+            if tool_call.function.name:
+                self.current_function_name += tool_call.function.name
+            if tool_call.function.arguments:
+                self.current_function_arguments.append(tool_call.function.arguments)
+
+            # Only parse arguments for "send_message" to stream partial text
+            if self.current_function_name.strip() == self.assistant_message_tool_name:
+                combined_args = "".join(self.current_function_arguments)
+                parsed_args = OptimisticJSONParser().parse(combined_args)
+
+                # If we can see a "message" field, return it as partial content
+                if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
+                    return ChatCompletionChunk(
+                        id=chunk.id,
+                        object=chunk.object,
+                        created=chunk.created.timestamp(),
+                        model=chunk.model,
+                        choices=[
+                            Choice(
+                                index=choice.index,
+                                delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
+                                finish_reason=None,
+                            )
+                        ],
+                    )
+
+        # If there's a finish reason, pass that along
+        if choice.finish_reason is not None:
+            return ChatCompletionChunk(
+                id=chunk.id,
+                object=chunk.object,
+                created=chunk.created.timestamp(),
+                model=chunk.model,
+                choices=[
+                    Choice(
+                        index=choice.index,
+                        delta=ChoiceDelta(),
+                        finish_reason=self.FINISH_REASON_STR,
+                    )
+                ],
+            )
+
+        return None
+
+    def _reset_parsing_state(self) -> None:
+        """Clears internal buffers for function call name/args."""
+        self.current_function_name = ""
+        self.current_function_arguments = []
diff --git a/letta/server/rest_api/optimistic_json_parser.py b/letta/server/rest_api/optimistic_json_parser.py
new file mode 100644
index 0000000000..9379b4e6ec
--- /dev/null
+++ b/letta/server/rest_api/optimistic_json_parser.py
@@ -0,0 +1,185 @@
+import json
+
+
+class OptimisticJSONParser:
+    """
+    A JSON parser that attempts to parse a given string using `json.loads`,
+    and if that fails, it parses as much valid JSON as possible while
+    allowing extra tokens to remain. Those extra tokens can be retrieved
+    from `self.last_parse_reminding`. If `strict` is False, the parser
+    tries to tolerate incomplete strings and incomplete numbers.
+    """
+
+    def __init__(self, strict=True):
+        self.strict = strict
+        self.parsers = {
+            " ": self.parse_space,
+            "\r": self.parse_space,
+            "\n": self.parse_space,
+            "\t": self.parse_space,
+            "[": self.parse_array,
+            "{": self.parse_object,
+            '"': self.parse_string,
+            "t": self.parse_true,
+            "f": self.parse_false,
+            "n": self.parse_null,
+        }
+        # Register number parser for digits and signs
+        for char in "0123456789.-":
+            self.parsers[char] = self.parse_number
+
+        self.last_parse_reminding = None
+        self.on_extra_token = self.default_on_extra_token
+
+    def default_on_extra_token(self, text, data, reminding):
+        pass
+
+    def parse(self, input_str):
+        """
+        Try to parse the entire `input_str` as JSON. If parsing fails,
+        attempts a partial parse, storing leftover text in
+        `self.last_parse_reminding`. A callback (`on_extra_token`) is
+        triggered if extra tokens remain.
+        """
+        if len(input_str) >= 1:
+            try:
+                return json.loads(input_str)
+            except json.JSONDecodeError as decode_error:
+                data, reminding = self.parse_any(input_str, decode_error)
+                self.last_parse_reminding = reminding
+                if self.on_extra_token and reminding:
+                    self.on_extra_token(input_str, data, reminding)
+                return data
+        else:
+            return json.loads("{}")
+
+    def parse_any(self, input_str, decode_error):
+        """Determine which parser to use based on the first character."""
+        if not input_str:
+            raise decode_error
+        parser = self.parsers.get(input_str[0])
+        if parser is None:
+            raise decode_error
+        return parser(input_str, decode_error)
+
+    def parse_space(self, input_str, decode_error):
+        """Strip leading whitespace and parse again."""
+        return self.parse_any(input_str.strip(), decode_error)
+
+    def parse_array(self, input_str, decode_error):
+        """Parse a JSON array, returning the list and remaining string."""
+        # Skip the '['
+        input_str = input_str[1:]
+        array_values = []
+        input_str = input_str.strip()
+        while input_str:
+            if input_str[0] == "]":
+                # Skip the ']'
+                input_str = input_str[1:]
+                break
+            value, input_str = self.parse_any(input_str, decode_error)
+            array_values.append(value)
+            input_str = input_str.strip()
+            if input_str.startswith(","):
+                # Skip the ','
+                input_str = input_str[1:].strip()
+        return array_values, input_str
+
+    def parse_object(self, input_str, decode_error):
+        """Parse a JSON object, returning the dict and remaining string."""
+        # Skip the '{'
+        input_str = input_str[1:]
+        obj = {}
+        input_str = input_str.strip()
+        while input_str:
+            if input_str[0] == "}":
+                # Skip the '}'
+                input_str = input_str[1:]
+                break
+            key, input_str = self.parse_any(input_str, decode_error)
+            input_str = input_str.strip()
+
+            if not input_str or input_str[0] == "}":
+                obj[key] = None
+                break
+            if input_str[0] != ":":
+                raise decode_error
+
+            # Skip ':'
+            input_str = input_str[1:].strip()
+            if not input_str or input_str[0] in ",}":
+                obj[key] = None
+                if input_str.startswith(","):
+                    input_str = input_str[1:]
+                break
+
+            value, input_str = self.parse_any(input_str, decode_error)
+            obj[key] = value
+            input_str = input_str.strip()
+            if input_str.startswith(","):
+                # Skip the ','
+                input_str = input_str[1:].strip()
+        return obj, input_str
+
+    def parse_string(self, input_str, decode_error):
+        """Parse a JSON string, respecting escaped quotes if present."""
+        end = input_str.find('"', 1)
+        while end != -1 and input_str[end - 1] == "\\":
+            end = input_str.find('"', end + 1)
+
+        if end == -1:
+            # Incomplete string
+            if not self.strict:
+                return input_str[1:], ""
+            return json.loads(f'"{input_str[1:]}"'), ""
+
+        str_val = input_str[: end + 1]
+        input_str = input_str[end + 1 :]
+        if not self.strict:
+            return str_val[1:-1], input_str
+        return json.loads(str_val), input_str
+
+    def parse_number(self, input_str, decode_error):
+        """
+        Parse a number (int or float). Allows digits, '.', '-', but
+        doesn't fully validate complex exponents unless they appear
+        before a non-number character.
+        """
+        idx = 0
+        while idx < len(input_str) and input_str[idx] in "0123456789.-":
+            idx += 1
+
+        num_str = input_str[:idx]
+        remainder = input_str[idx:]
+
+        # If it's only a sign or just '.', return as-is with empty remainder
+        if not num_str or num_str in {"-", "."}:
+            return num_str, ""
+
+        try:
+            if num_str.endswith("."):
+                num = int(num_str[:-1])
+            else:
+                num = float(num_str) if any(c in num_str for c in ".eE") else int(num_str)
+        except ValueError:
+            raise decode_error
+
+        return num, remainder
+
+    def parse_true(self, input_str, decode_error):
+        """Parse a 'true' value."""
+        if input_str.startswith(("t", "T")):
+            return True, input_str[4:]
+        raise decode_error
+
+    def parse_false(self, input_str, decode_error):
+        """Parse a 'false' value."""
+        if input_str.startswith(("f", "F")):
+            return False, input_str[5:]
+        raise decode_error
+
+    def parse_null(self, input_str, decode_error):
+        """Parse a 'null' value."""
+        if input_str.startswith("n"):
+            return None, input_str[4:]
+        raise decode_error
diff --git a/letta/server/rest_api/routers/openai/chat_completions/__init__.py b/letta/server/rest_api/routers/openai/chat_completions/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py b/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py
new file mode 100644
index 0000000000..883af9429c
--- /dev/null
+++ b/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py
@@ -0,0 +1,161 @@
+import asyncio
+from typing import TYPE_CHECKING, Iterable, List, Optional, Union, cast
+
+from fastapi import APIRouter, Body, Depends, Header, HTTPException
+from fastapi.responses import StreamingResponse
+from openai.types.chat import ChatCompletionMessageParam
+from openai.types.chat.completion_create_params import CompletionCreateParams
+
+from letta.agent import Agent
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
+from letta.log import get_logger
+from letta.schemas.message import MessageCreate
+from letta.schemas.openai.chat_completion_response import Message
+from letta.schemas.user import User
+from letta.server.rest_api.chat_completions_interface import ChatCompletionsStreamingInterface
+
+# TODO this belongs in a controller!
+from letta.server.rest_api.utils import get_letta_server, sse_async_generator
+
+if TYPE_CHECKING:
+    from letta.server.server import SyncServer
+
+router = APIRouter(prefix="/v1", tags=["chat_completions"])
+
+logger = get_logger(__name__)
+
+
+@router.post(
+    "/chat/completions",
+    response_model=None,
+    operation_id="create_chat_completions",
+    responses={
+        200: {
+            "description": "Successful response",
+            "content": {
+                "text/event-stream": {"description": "Server-Sent Events stream"},
+            },
+        }
+    },
+)
+async def create_chat_completions(
+    completion_request: CompletionCreateParams = Body(...),
+    server: "SyncServer" = Depends(get_letta_server),
+    user_id: Optional[str] = Header(None, alias="user_id"),
+):
+    # Validate and process fields
+    try:
+        messages = list(cast(Iterable[ChatCompletionMessageParam], completion_request["messages"]))
+    except KeyError:
+        # Handle the case where "messages" is not present in the request
+        raise HTTPException(status_code=400, detail="The 'messages' field is missing in the request.")
+    except TypeError:
+        # Handle the case where "messages" is not iterable
+        raise HTTPException(status_code=400, detail="The 'messages' field must be an iterable.")
+    except Exception as e:
+        # Catch any other unexpected errors and include the exception message
+        raise HTTPException(status_code=400, detail=f"An error occurred while processing 'messages': {str(e)}")
+
+    if messages[-1]["role"] != "user":
+        logger.error(f"The last message does not have a `user` role: {messages}")
+        raise HTTPException(status_code=400, detail="'messages[-1].role' must be a 'user'")
+
+    input_message = messages[-1]
+    if not isinstance(input_message["content"], str):
+        logger.error(f"The input message does not have valid content: {input_message}")
+        raise HTTPException(status_code=400, detail="'messages[-1].content' must be a 'string'")
+
+    # Process remaining fields
+    if not completion_request["stream"]:
+        raise HTTPException(status_code=400, detail="Must be streaming request: `stream` was set to `False` in the request.")
+
+    actor = server.user_manager.get_user_or_default(user_id=user_id)
+
+    agent_id = str(completion_request.get("user", None))
+    if agent_id is None:
+        error_msg = "Must pass agent_id in the 'user' field"
+        logger.error(error_msg)
+        raise HTTPException(status_code=400, detail=error_msg)
+
+    letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
+    llm_config = letta_agent.agent_state.llm_config
+    if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
+        error_msg = f"You can only use models with type 'openai' for chat completions. This agent {agent_id} has llm_config: \n{llm_config.model_dump_json(indent=4)}"
+        logger.error(error_msg)
+        raise HTTPException(status_code=400, detail=error_msg)
+
+    model = completion_request.get("model")
+    if model != llm_config.model:
+        warning_msg = f"The requested model {model} is different from the model specified in this agent's ({agent_id}) llm_config: \n{llm_config.model_dump_json(indent=4)}"
+        logger.warning(f"Defaulting to {llm_config.model}...")
+        logger.warning(warning_msg)
+
+    logger.info(f"Received input message: {input_message}")
+
+    return await send_message_to_agent_chat_completions(
+        server=server,
+        letta_agent=letta_agent,
+        actor=actor,
+        messages=[MessageCreate(role=input_message["role"], content=input_message["content"])],
+    )
+
+
+async def send_message_to_agent_chat_completions(
+    server: "SyncServer",
+    letta_agent: Agent,
+    actor: User,
+    messages: Union[List[Message], List[MessageCreate]],
+    assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
+    assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
+) -> StreamingResponse:
+    """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
+    # For streaming response
+    try:
+        # TODO: cleanup this logic
+        llm_config = letta_agent.agent_state.llm_config
+
+        # Create a new interface per request
+        letta_agent.interface = ChatCompletionsStreamingInterface()
+        streaming_interface = letta_agent.interface
+        if not isinstance(streaming_interface, ChatCompletionsStreamingInterface):
+            raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
+
+        # Allow AssistantMessage is desired by client
+        streaming_interface.assistant_message_tool_name = assistant_message_tool_name
+        streaming_interface.assistant_message_tool_kwarg = assistant_message_tool_kwarg
+
+        # Related to JSON buffer reader
+        streaming_interface.inner_thoughts_in_kwargs = (
+            llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
+        )
+
+        # Offload the synchronous message_func to a separate thread
+        streaming_interface.stream_start()
+        asyncio.create_task(
+            asyncio.to_thread(
+                server.send_messages,
+                actor=actor,
+                agent_id=letta_agent.agent_state.id,
+                messages=messages,
+                interface=streaming_interface,
+            )
+        )
+
+        # return a stream
+        return StreamingResponse(
+            sse_async_generator(
+                streaming_interface.get_generator(),
+                usage_task=None,
+                finish_message=True,
+            ),
+            media_type="text/event-stream",
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(e)
+        import traceback
+
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"{e}")
diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py
index 50a9f1b3ae..458e8fe450 100644
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Annotated, List, Optional, Union
+from typing import Annotated, List, Optional
 
 from fastapi import APIRouter, BackgroundTasks, Body, Depends, Header, HTTPException, Query, status
 from fastapi.responses import JSONResponse
@@ -10,7 +10,7 @@
 from letta.orm.errors import NoResultFound
 from letta.schemas.agent import AgentState, CreateAgent, UpdateAgent
 from letta.schemas.block import Block, BlockUpdate, CreateBlock  # , BlockLabelUpdate, BlockLimitUpdate
-from letta.schemas.job import JobStatus, JobUpdate
+from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
 from letta.schemas.letta_message import LettaMessageUnion
 from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
 from letta.schemas.letta_response import LettaResponse
@@ -392,15 +392,7 @@ def delete_archival_memory(
 
 
 AgentMessagesResponse = Annotated[
-    Union[List[Message], List[LettaMessageUnion]],
-    Field(
-        json_schema_extra={
-            "anyOf": [
-                {"type": "array", "items": {"$ref": "#/components/schemas/Message"}},
-                {"type": "array", "items": {"$ref": "#/components/schemas/LettaMessageUnion"}},
-            ]
-        }
-    ),
+    List[LettaMessageUnion], Field(json_schema_extra={"type": "array", "items": {"$ref": "#/components/schemas/LettaMessageUnion"}})
 ]
 
 
@@ -411,16 +403,9 @@ def list_messages(
     after: Optional[str] = Query(None, description="Message after which to retrieve the returned messages."),
     before: Optional[str] = Query(None, description="Message before which to retrieve the returned messages."),
     limit: int = Query(10, description="Maximum number of messages to retrieve."),
-    msg_object: bool = Query(False, description="If true, returns Message objects. If false, return LettaMessage objects."),
-    # Flags to support the use of AssistantMessage message types
-    assistant_message_tool_name: str = Query(
-        DEFAULT_MESSAGE_TOOL,
-        description="The name of the designated message tool.",
-    ),
-    assistant_message_tool_kwarg: str = Query(
-        DEFAULT_MESSAGE_TOOL_KWARG,
-        description="The name of the message argument in the designated message tool.",
-    ),
+    use_assistant_message: bool = Query(True, description="Whether to use assistant messages"),
+    assistant_message_tool_name: str = Query(DEFAULT_MESSAGE_TOOL, description="The name of the designated message tool."),
+    assistant_message_tool_kwarg: str = Query(DEFAULT_MESSAGE_TOOL_KWARG, description="The name of the message argument."),
     user_id: Optional[str] = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
 ):
     """
@@ -435,7 +420,8 @@ def list_messages(
         before=before,
         limit=limit,
         reverse=True,
-        return_message_object=msg_object,
+        return_message_object=False,
+        use_assistant_message=use_assistant_message,
         assistant_message_tool_name=assistant_message_tool_name,
         assistant_message_tool_kwarg=assistant_message_tool_kwarg,
     )
@@ -480,9 +466,9 @@ async def send_message(
         stream_steps=False,
         stream_tokens=False,
         # Support for AssistantMessage
-        use_assistant_message=request.config.use_assistant_message,
-        assistant_message_tool_name=request.config.assistant_message_tool_name,
-        assistant_message_tool_kwarg=request.config.assistant_message_tool_kwarg,
+        use_assistant_message=request.use_assistant_message,
+        assistant_message_tool_name=request.assistant_message_tool_name,
+        assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
     )
     return result
 
@@ -520,9 +506,9 @@ async def send_message_streaming(
         stream_steps=True,
         stream_tokens=request.stream_tokens,
         # Support for AssistantMessage
-        use_assistant_message=request.config.use_assistant_message,
-        assistant_message_tool_name=request.config.assistant_message_tool_name,
-        assistant_message_tool_kwarg=request.config.assistant_message_tool_kwarg,
+        use_assistant_message=request.use_assistant_message,
+        assistant_message_tool_name=request.assistant_message_tool_name,
+        assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
     )
     return result
 
@@ -597,7 +583,11 @@ async def send_message_async(
             "job_type": "send_message_async",
             "agent_id": agent_id,
         },
-        request_config=request.config,
+        request_config=LettaRequestConfig(
+            use_assistant_message=request.use_assistant_message,
+            assistant_message_tool_name=request.assistant_message_tool_name,
+            assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
+        ),
     )
     run = server.job_manager.create_job(pydantic_job=run, actor=actor)
 
@@ -609,9 +599,9 @@ async def send_message_async(
         actor=actor,
         agent_id=agent_id,
         messages=request.messages,
-        use_assistant_message=request.config.use_assistant_message,
-        assistant_message_tool_name=request.config.assistant_message_tool_name,
-        assistant_message_tool_kwarg=request.config.assistant_message_tool_kwarg,
+        use_assistant_message=request.use_assistant_message,
+        assistant_message_tool_name=request.assistant_message_tool_name,
+        assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
     )
 
     return run
diff --git a/letta/server/server.py b/letta/server/server.py
index c9780fdd75..4ff2bccfdb 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -62,6 +62,7 @@
 from letta.schemas.tool import Tool
 from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User
+from letta.server.rest_api.chat_completions_interface import ChatCompletionsStreamingInterface
 from letta.server.rest_api.interface import StreamingServerInterface
 from letta.server.rest_api.utils import sse_async_generator
 from letta.services.agent_manager import AgentManager
@@ -206,7 +207,7 @@ def db_error_handler():
 else:
     # TODO: don't rely on config storage
     engine_path = "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db")
-    print("Creating sqlite engine", engine_path)
+    logger.info("Creating sqlite engine " + engine_path)
 
     engine = create_engine(engine_path)
 
@@ -719,7 +720,7 @@ def send_messages(
         # whether or not to wrap user and system message as MemGPT-style stringified JSON
         wrap_user_message: bool = True,
         wrap_system_message: bool = True,
-        interface: Union[AgentInterface, None] = None,  # needed to getting responses
+        interface: Union[AgentInterface, ChatCompletionsStreamingInterface, None] = None,  # needed to getting responses
         metadata: Optional[dict] = None,  # Pass through metadata to interface
     ) -> LettaUsageStatistics:
         """Send a list of messages to the agent
@@ -735,7 +736,7 @@ def send_messages(
             for message in messages:
                 assert isinstance(message, MessageCreate)
 
-                # If wrapping is eanbled, wrap with metadata before placing content inside the Message object
+                # If wrapping is enabled, wrap with metadata before placing content inside the Message object
                 if message.role == MessageRole.user and wrap_user_message:
                     message.content = system.package_user_message(user_message=message.content)
                 elif message.role == MessageRole.system and wrap_system_message:
@@ -870,6 +871,7 @@ def get_agent_recall(
         limit: Optional[int] = 100,
         reverse: Optional[bool] = False,
         return_message_object: bool = True,
+        use_assistant_message: bool = True,
         assistant_message_tool_name: str = constants.DEFAULT_MESSAGE_TOOL,
         assistant_message_tool_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
     ) -> Union[List[Message], List[LettaMessage]]:
@@ -889,14 +891,12 @@ def get_agent_recall(
         )
 
         if not return_message_object:
-            records = [
-                msg
-                for m in records
-                for msg in m.to_letta_message(
-                    assistant_message_tool_name=assistant_message_tool_name,
-                    assistant_message_tool_kwarg=assistant_message_tool_kwarg,
-                )
-            ]
+            records = Message.to_letta_messages_from_list(
+                messages=records,
+                use_assistant_message=use_assistant_message,
+                assistant_message_tool_name=assistant_message_tool_name,
+                assistant_message_tool_kwarg=assistant_message_tool_kwarg,
+            )
 
         if reverse:
             records = records[::-1]
@@ -1289,7 +1289,7 @@ async def send_message_to_agent(
                 llm_config.model_endpoint_type not in ["openai", "anthropic"] or "inference.memgpt.ai" in llm_config.model_endpoint
             ):
                 warnings.warn(
-                    "Token streaming is only supported for models with type 'openai', 'anthropic', or `inference.memgpt.ai` in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
+                    f"Token streaming is only supported for models with type 'openai' or 'anthropic' in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
                 )
                 stream_tokens = False
 
diff --git a/letta/services/job_manager.py b/letta/services/job_manager.py
index 59877ee055..543c1536fb 100644
--- a/letta/services/job_manager.py
+++ b/letta/services/job_manager.py
@@ -14,9 +14,8 @@
 from letta.orm.step import Step
 from letta.schemas.enums import JobStatus, MessageRole
 from letta.schemas.job import Job as PydanticJob
-from letta.schemas.job import JobUpdate
+from letta.schemas.job import JobUpdate, LettaRequestConfig
 from letta.schemas.letta_message import LettaMessage
-from letta.schemas.letta_request import LettaRequestConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.run import Run as PydanticRun
 from letta.schemas.usage import LettaUsageStatistics
@@ -303,16 +302,12 @@ def get_run_messages(
 
         request_config = self._get_run_request_config(run_id)
 
-        # Convert messages to LettaMessages
-        messages = [
-            msg
-            for m in messages
-            for msg in m.to_letta_message(
-                assistant_message=request_config["use_assistant_message"],
-                assistant_message_tool_name=request_config["assistant_message_tool_name"],
-                assistant_message_tool_kwarg=request_config["assistant_message_tool_kwarg"],
-            )
-        ]
+        messages = PydanticMessage.to_letta_messages_from_list(
+            messages=messages,
+            use_assistant_message=request_config["use_assistant_message"],
+            assistant_message_tool_name=request_config["assistant_message_tool_name"],
+            assistant_message_tool_kwarg=request_config["assistant_message_tool_kwarg"],
+        )
 
         return messages
 
diff --git a/letta/services/tool_manager.py b/letta/services/tool_manager.py
index 01e4c8557b..2e831f9241 100644
--- a/letta/services/tool_manager.py
+++ b/letta/services/tool_manager.py
@@ -4,6 +4,7 @@
 
 from letta.constants import BASE_MEMORY_TOOLS, BASE_TOOLS, MULTI_AGENT_TOOLS
 from letta.functions.functions import derive_openai_json_schema, load_function_set
+from letta.log import get_logger
 from letta.orm.enums import ToolType
 
 # TODO: Remove this once we translate all of these to the ORM
@@ -14,6 +15,8 @@
 from letta.schemas.user import User as PydanticUser
 from letta.utils import enforce_types, printd
 
+logger = get_logger(__name__)
+
 
 class ToolManager:
     """Manager class to handle business logic related to Tools."""
@@ -102,7 +105,20 @@ def list_tools(self, actor: PydanticUser, after: Optional[str] = None, limit: Op
                 limit=limit,
                 organization_id=actor.organization_id,
             )
-            return [tool.to_pydantic() for tool in tools]
+
+        # Remove any malformed tools
+        results = []
+        for tool in tools:
+            try:
+                pydantic_tool = tool.to_pydantic()
+                results.append(pydantic_tool)
+            except (ValueError, ModuleNotFoundError, AttributeError) as e:
+                logger.warning(f"Deleting malformed tool with id={tool.id} and name={tool.name}, error was:\n{e}")
+                logger.warning("Deleted tool: ")
+                logger.warning(tool.pretty_print_columns())
+                self.delete_tool_by_id(tool.id, actor=actor)
+
+        return results
 
     @enforce_types
     def update_tool_by_id(self, tool_id: str, tool_update: ToolUpdate, actor: PydanticUser) -> PydanticTool:
diff --git a/letta/system.py b/letta/system.py
index 9c795704c7..a13e36f1ee 100644
--- a/letta/system.py
+++ b/letta/system.py
@@ -1,5 +1,6 @@
 import json
 import uuid
+import warnings
 from typing import Optional
 
 from .constants import (
@@ -205,3 +206,22 @@ def get_token_limit_warning():
     }
 
     return json_dumps(packaged_message)
+
+
+def unpack_message(packed_message) -> str:
+    """Take a packed message string and attempt to extract the inner message content"""
+
+    try:
+        message_json = json.loads(packed_message)
+    except:
+        warnings.warn(f"Was unable to load message as JSON to unpack: ''{packed_message}")
+        return packed_message
+
+    if "message" not in message_json:
+        if "type" in message_json and message_json["type"] in ["login", "heartbeat"]:
+            # This is a valid user message that the ADE expects, so don't print warning
+            return packed_message
+        warnings.warn(f"Was unable to find 'message' field in packed message object: '{packed_message}'")
+        return packed_message
+    else:
+        return message_json.get("message")
diff --git a/letta/utils.py b/letta/utils.py
index 18a5093ad5..171391e37d 100644
--- a/letta/utils.py
+++ b/letta/utils.py
@@ -1,3 +1,4 @@
+import asyncio
 import copy
 import difflib
 import hashlib
@@ -15,7 +16,7 @@
 from contextlib import contextmanager
 from datetime import datetime, timedelta, timezone
 from functools import wraps
-from typing import List, Union, _GenericAlias, get_args, get_origin, get_type_hints
+from typing import Any, Coroutine, List, Union, _GenericAlias, get_args, get_origin, get_type_hints
 from urllib.parse import urljoin, urlparse
 
 import demjson3 as demjson
@@ -1127,3 +1128,25 @@ def get_friendly_error_msg(function_name: str, exception_name: str, exception_me
     if len(error_msg) > MAX_ERROR_MESSAGE_CHAR_LIMIT:
         error_msg = error_msg[:MAX_ERROR_MESSAGE_CHAR_LIMIT]
     return error_msg
+
+
+def run_async_task(coro: Coroutine[Any, Any, Any]) -> Any:
+    """
+    Safely runs an asynchronous coroutine in a synchronous context.
+
+    If an event loop is already running, it uses `asyncio.ensure_future`.
+    Otherwise, it creates a new event loop and runs the coroutine.
+
+    Args:
+        coro: The coroutine to execute.
+
+    Returns:
+        The result of the coroutine.
+    """
+    try:
+        # If there's already a running event loop, schedule the coroutine
+        loop = asyncio.get_running_loop()
+        return asyncio.run_until_complete(coro) if loop.is_closed() else asyncio.ensure_future(coro)
+    except RuntimeError:
+        # If no event loop is running, create a new one
+        return asyncio.run(coro)
diff --git a/poetry.lock b/poetry.lock
index d25f2ecb60..707c859353 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -249,13 +249,13 @@ files = [
 
 [[package]]
 name = "attrs"
-version = "24.3.0"
+version = "25.1.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"},
-    {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"},
+    {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
+    {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
 ]
 
 [package.extras]
@@ -779,13 +779,13 @@ test = ["pytest"]
 
 [[package]]
 name = "composio-core"
-version = "0.6.16"
+version = "0.6.19"
 description = "Core package to act as a bridge between composio platform and other services."
 optional = false
 python-versions = "<4,>=3.9"
 files = [
-    {file = "composio_core-0.6.16-py3-none-any.whl", hash = "sha256:1b43fa77a7260c065e9e7b0222d42935b54a25e926a4a61fe2084d7d9d373d4b"},
-    {file = "composio_core-0.6.16.tar.gz", hash = "sha256:dee0f72fa7d58e660325940308c46365e28d6a068ba777d1eb7f6c545b6fa8b7"},
+    {file = "composio_core-0.6.19-py3-none-any.whl", hash = "sha256:0b52126fa191934f3ea28920d5e9f87932d1ae779736aa6f94694535fe0cf1fe"},
+    {file = "composio_core-0.6.19.tar.gz", hash = "sha256:431bc9665590c7c737f5cb5bb5d88689cfbacfce978219fce79bd1969d967325"},
 ]
 
 [package.dependencies]
@@ -800,6 +800,7 @@ paramiko = ">=3.4.1"
 pydantic = ">=2.6.4"
 pyperclip = ">=1.8.2,<2"
 pysher = "1.0.8"
+pyyaml = ">=6.0.2"
 requests = ">=2.31.0,<3"
 rich = ">=13.7.1,<14"
 semver = ">=2.13.0"
@@ -807,7 +808,7 @@ sentry-sdk = ">=2.0.0"
 uvicorn = "*"
 
 [package.extras]
-all = ["aiohttp", "click", "diskcache", "docker (>=7.1.0)", "e2b (>=0.17.2a37,<1)", "e2b-code-interpreter", "fastapi", "flake8", "gql", "importlib-metadata (>=4.8.1)", "inflection (>=0.5.1)", "jsonref (>=1.1.0)", "jsonschema (>=4.21.1,<5)", "networkx", "paramiko (>=3.4.1)", "pathspec", "pydantic (>=2.6.4)", "pygments", "pyperclip (>=1.8.2,<2)", "pysher (==1.0.8)", "requests (>=2.31.0,<3)", "requests_toolbelt", "rich (>=13.7.1,<14)", "ruff", "semver (>=2.13.0)", "sentry-sdk (>=2.0.0)", "transformers", "uvicorn"]
+all = ["aiohttp", "click", "diskcache", "docker (>=7.1.0)", "e2b (>=0.17.2a37,<1)", "e2b-code-interpreter", "fastapi", "flake8", "gql", "importlib-metadata (>=4.8.1)", "inflection (>=0.5.1)", "jsonref (>=1.1.0)", "jsonschema (>=4.21.1,<5)", "networkx", "paramiko (>=3.4.1)", "pathspec", "pydantic (>=2.6.4)", "pygments", "pyperclip (>=1.8.2,<2)", "pysher (==1.0.8)", "pyyaml (>=6.0.2)", "requests (>=2.31.0,<3)", "requests_toolbelt", "rich (>=13.7.1,<14)", "ruff", "semver (>=2.13.0)", "sentry-sdk (>=2.0.0)", "transformers", "uvicorn"]
 docker = ["docker (>=7.1.0)"]
 e2b = ["e2b (>=0.17.2a37,<1)", "e2b-code-interpreter"]
 flyio = ["gql", "requests_toolbelt"]
@@ -815,17 +816,17 @@ tools = ["diskcache", "flake8", "networkx", "pathspec", "pygments", "ruff", "tra
 
 [[package]]
 name = "composio-langchain"
-version = "0.6.16"
+version = "0.6.19"
 description = "Use Composio to get an array of tools with your LangChain agent."
 optional = false
 python-versions = "<4,>=3.9"
 files = [
-    {file = "composio_langchain-0.6.16-py3-none-any.whl", hash = "sha256:1d595224897dffda64bb255fdf6fa82ce56df08c80fc82083af8e2456ea63c26"},
-    {file = "composio_langchain-0.6.16.tar.gz", hash = "sha256:e8dd1c1de4e717d3fc502d13590b65527b2d33a9e72f77615c230b441d1963ef"},
+    {file = "composio_langchain-0.6.19-py3-none-any.whl", hash = "sha256:d0811956fe22bfa20d08828edca1757523730a6a02e6021e8ce3509c926c7f9b"},
+    {file = "composio_langchain-0.6.19.tar.gz", hash = "sha256:17b8c7ee042c0cf2c154772d742fe19e9d79a7e9e2a32d382d6f722b2104d671"},
 ]
 
 [package.dependencies]
-composio_core = ">=0.6.11,<0.7.0"
+composio_core = ">=0.6.0,<0.7.0"
 langchain = ">=0.1.0"
 langchain-openai = ">=0.0.2.post1"
 langchainhub = ">=0.1.15"
@@ -1012,20 +1013,20 @@ files = [
 
 [[package]]
 name = "deprecated"
-version = "1.2.15"
+version = "1.2.18"
 description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 files = [
-    {file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"},
-    {file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"},
+    {file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"},
+    {file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"},
 ]
 
 [package.dependencies]
 wrapt = ">=1.10,<2"
 
 [package.extras]
-dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "jinja2 (>=3.0.3,<3.1.0)", "setuptools", "sphinx (<2)", "tox"]
+dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"]
 
 [[package]]
 name = "dill"
@@ -1120,13 +1121,13 @@ files = [
 
 [[package]]
 name = "e2b"
-version = "1.0.5"
+version = "1.0.6"
 description = "E2B SDK that give agents cloud environments"
 optional = true
 python-versions = "<4.0,>=3.8"
 files = [
-    {file = "e2b-1.0.5-py3-none-any.whl", hash = "sha256:a71bdec46f33d3e38e87d475d7fd2939bd7b6b753b819c9639ca211cd375b79e"},
-    {file = "e2b-1.0.5.tar.gz", hash = "sha256:43c82705af7b7d4415c2510ff77dab4dc075351e0b769d6adf8e0d7bb4868d13"},
+    {file = "e2b-1.0.6-py3-none-any.whl", hash = "sha256:4ae6e00d46e6b0b9ab05388c408f9155488ee9f022c5a6fd47939f492ccf3b58"},
+    {file = "e2b-1.0.6.tar.gz", hash = "sha256:e35d47f5581565060a5c18e4cb839cf61de310d275fa0a6589d8fc8bf65957a7"},
 ]
 
 [package.dependencies]
@@ -1910,13 +1911,13 @@ files = [
 
 [[package]]
 name = "huggingface-hub"
-version = "0.27.1"
+version = "0.28.0"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = true
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.27.1-py3-none-any.whl", hash = "sha256:1c5155ca7d60b60c2e2fc38cbb3ffb7f7c3adf48f824015b219af9061771daec"},
-    {file = "huggingface_hub-0.27.1.tar.gz", hash = "sha256:c004463ca870283909d715d20f066ebd6968c2207dae9393fdffb3c1d4d8f98b"},
+    {file = "huggingface_hub-0.28.0-py3-none-any.whl", hash = "sha256:71cff4e500efe68061d94b7f6d3114e183715088be7a90bf4dd84af83b5f5cdb"},
+    {file = "huggingface_hub-0.28.0.tar.gz", hash = "sha256:c2b18c02a47d4384763caddb4d0ab2a8fc6c16e0800d6de4d55d0a896244aba3"},
 ]
 
 [package.dependencies]
@@ -1929,13 +1930,13 @@ tqdm = ">=4.42.1"
 typing-extensions = ">=3.7.4.3"
 
 [package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
 hf-transfer = ["hf-transfer (>=0.1.4)"]
 inference = ["aiohttp"]
-quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.5.0)"]
+quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.9.0)"]
 tensorflow = ["graphviz", "pydot", "tensorflow"]
 tensorflow-testing = ["keras (<3.0)", "tensorflow"]
 testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
@@ -2385,19 +2386,19 @@ test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"
 
 [[package]]
 name = "langchain"
-version = "0.3.15"
+version = "0.3.16"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain-0.3.15-py3-none-any.whl", hash = "sha256:2657735184054cae8181ac43fce6cbc9ee64ca81a2ad2aed3ccd6e5d6fe1f19f"},
-    {file = "langchain-0.3.15.tar.gz", hash = "sha256:1204d67f8469cd8da5621d2b39501650a824d4c0d5a74264dfe3df9a7528897e"},
+    {file = "langchain-0.3.16-py3-none-any.whl", hash = "sha256:9a9c1a0604b599e929a5a823ee1491065dc8758fc1802d3df344214ab765f555"},
+    {file = "langchain-0.3.16.tar.gz", hash = "sha256:17d35ee6991e0ebd980c1be86c34b2d48e961213ca89e7b585f6333c90cdbdb4"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
-langchain-core = ">=0.3.31,<0.4.0"
+langchain-core = ">=0.3.32,<0.4.0"
 langchain-text-splitters = ">=0.3.3,<0.4.0"
 langsmith = ">=0.1.17,<0.4"
 numpy = [
@@ -2412,21 +2413,21 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10"
 
 [[package]]
 name = "langchain-community"
-version = "0.3.15"
+version = "0.3.16"
 description = "Community contributed LangChain integrations."
 optional = true
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain_community-0.3.15-py3-none-any.whl", hash = "sha256:5b6ac359f75922a826566f94eb9a9b5c763cc78f395f0baf2f5638e62fdae1dd"},
-    {file = "langchain_community-0.3.15.tar.gz", hash = "sha256:c2fee46a0ea1b94c475bd4263edb53d5615dbe37c5263480bf55cb8e465ac235"},
+    {file = "langchain_community-0.3.16-py3-none-any.whl", hash = "sha256:a702c577b048d48882a46708bb3e08ca9aec79657c421c3241a305409040c0d6"},
+    {file = "langchain_community-0.3.16.tar.gz", hash = "sha256:825709bc328e294942b045d0b7f55053e8e88f7f943576306d778cf56417126c"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 dataclasses-json = ">=0.5.7,<0.7"
 httpx-sse = ">=0.4.0,<0.5.0"
-langchain = ">=0.3.15,<0.4.0"
-langchain-core = ">=0.3.31,<0.4.0"
+langchain = ">=0.3.16,<0.4.0"
+langchain-core = ">=0.3.32,<0.4.0"
 langsmith = ">=0.1.125,<0.4"
 numpy = [
     {version = ">=1.22.4,<2", markers = "python_version < \"3.12\""},
@@ -2440,13 +2441,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10"
 
 [[package]]
 name = "langchain-core"
-version = "0.3.31"
+version = "0.3.32"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langchain_core-0.3.31-py3-none-any.whl", hash = "sha256:882e64ad95887c951dce8e835889e43263b11848c394af3b73e06912624bd743"},
-    {file = "langchain_core-0.3.31.tar.gz", hash = "sha256:5ffa56354c07de9efaa4139609659c63e7d9b29da2c825f6bab9392ec98300df"},
+    {file = "langchain_core-0.3.32-py3-none-any.whl", hash = "sha256:c050bd1e6dd556ae49073d338aca9dca08b7b55f4778ddce881a12224bc82a7e"},
+    {file = "langchain_core-0.3.32.tar.gz", hash = "sha256:4eb85d8428585e67a1766e29c6aa2f246c6329d97cb486e8d6f564ab0bd94a4f"},
 ]
 
 [package.dependencies]
@@ -2509,13 +2510,13 @@ types-requests = ">=2.31.0.2,<3.0.0.0"
 
 [[package]]
 name = "langsmith"
-version = "0.3.1"
+version = "0.3.2"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "langsmith-0.3.1-py3-none-any.whl", hash = "sha256:b6afbb214ae82b6d96b8134718db3a7d2598b2a7eb4ab1212bcd6d96e04eda10"},
-    {file = "langsmith-0.3.1.tar.gz", hash = "sha256:9242a49d37e2176a344ddec97bf57b958dc0e1f0437e150cefd0fb70195f0e26"},
+    {file = "langsmith-0.3.2-py3-none-any.whl", hash = "sha256:48ff6bc5eda62f4729596bb68d4f96166d2654728ac32970b69b1be874c61925"},
+    {file = "langsmith-0.3.2.tar.gz", hash = "sha256:7724668e9705734ab25a7977fc34a9ee15a40ba4108987926c69293a05d40229"},
 ]
 
 [package.dependencies]
@@ -2535,13 +2536,13 @@ pytest = ["pytest (>=7.0.0)", "rich (>=13.9.4,<14.0.0)"]
 
 [[package]]
 name = "letta-client"
-version = "0.1.22"
+version = "0.1.23"
 description = ""
 optional = false
 python-versions = "<4.0,>=3.8"
 files = [
-    {file = "letta_client-0.1.22-py3-none-any.whl", hash = "sha256:6a108ac6d4cb1c79870a1defffcb6eb1ea6eca6da071f0b730f044a96d482c01"},
-    {file = "letta_client-0.1.22.tar.gz", hash = "sha256:75483fc41fb3baf1170b11c44c25b45d62c439b8b9a9720601446a9b83ee636e"},
+    {file = "letta_client-0.1.23-py3-none-any.whl", hash = "sha256:755c78e99d9e69589c333c9e362e08a75d6edac379fc0eb8265adb7546fffda7"},
+    {file = "letta_client-0.1.23.tar.gz", hash = "sha256:d3b0d5bde93827a700f23325f4f9fbd9dc0d0789aaced9f0511e9e6fb6d23446"},
 ]
 
 [package.dependencies]
@@ -2553,13 +2554,13 @@ typing_extensions = ">=4.0.0"
 
 [[package]]
 name = "llama-cloud"
-version = "0.1.10"
+version = "0.1.11"
 description = ""
 optional = false
 python-versions = "<4,>=3.8"
 files = [
-    {file = "llama_cloud-0.1.10-py3-none-any.whl", hash = "sha256:d91198ad92ea6c3a25757e5d6cb565b4bd6db385dc4fa596a725c0fb81a68f4e"},
-    {file = "llama_cloud-0.1.10.tar.gz", hash = "sha256:56ffe8f2910c2047dd4eb1b13da31ee5f67321a000794eee559e0b56954d2f76"},
+    {file = "llama_cloud-0.1.11-py3-none-any.whl", hash = "sha256:b703765d03783a5a0fc57a52adc9892f8b91b0c19bbecb85a54ad4e813342951"},
+    {file = "llama_cloud-0.1.11.tar.gz", hash = "sha256:d4be5b48659fd9fe1698727be257269a22d7f2733a2ed11bce7065768eb94cbe"},
 ]
 
 [package.dependencies]
@@ -2569,19 +2570,19 @@ pydantic = ">=1.10"
 
 [[package]]
 name = "llama-index"
-version = "0.12.13"
+version = "0.12.14"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index-0.12.13-py3-none-any.whl", hash = "sha256:0b285aa451ced6bd8da40df99068ac96badf8b5725c4edc29f2bce4da2ffd8bc"},
-    {file = "llama_index-0.12.13.tar.gz", hash = "sha256:1e39a397dcc51dabe280c121fd8d5451a6a84595233a8b26caa54d9b7ecf9ffc"},
+    {file = "llama_index-0.12.14-py3-none-any.whl", hash = "sha256:cafbac9f08f1f7293169bfd3c75545db3b761742ea829ba6940c3f2c3b1c2d26"},
+    {file = "llama_index-0.12.14.tar.gz", hash = "sha256:aa74315b32e93a77e285519459d77b98be7db9ae4c5aa64aac2c54cc919c838f"},
 ]
 
 [package.dependencies]
 llama-index-agent-openai = ">=0.4.0,<0.5.0"
 llama-index-cli = ">=0.4.0,<0.5.0"
-llama-index-core = ">=0.12.13,<0.13.0"
+llama-index-core = ">=0.12.14,<0.13.0"
 llama-index-embeddings-openai = ">=0.3.0,<0.4.0"
 llama-index-indices-managed-llama-cloud = ">=0.4.0"
 llama-index-llms-openai = ">=0.3.0,<0.4.0"
@@ -2594,13 +2595,13 @@ nltk = ">3.8.1"
 
 [[package]]
 name = "llama-index-agent-openai"
-version = "0.4.2"
+version = "0.4.3"
 description = "llama-index agent openai integration"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_agent_openai-0.4.2-py3-none-any.whl", hash = "sha256:e100b8a743b11fef373b5be31be590b929950a4d7fd9d158b5f014dd8fd7976e"},
-    {file = "llama_index_agent_openai-0.4.2.tar.gz", hash = "sha256:0f8aeb091fc834b2667a46ad2417fc8601bf1c08ccfd1a3d15ede90a30eb1a29"},
+    {file = "llama_index_agent_openai-0.4.3-py3-none-any.whl", hash = "sha256:5d1fbb6831113e609296e457b0a4d1c08c9267acca219eb78cb702bd76a0744d"},
+    {file = "llama_index_agent_openai-0.4.3.tar.gz", hash = "sha256:ff1f4a13ba417cb4b9cfbc2ffa9f162bdbdda9b87d6645d512cbde2061f55412"},
 ]
 
 [package.dependencies]
@@ -2626,13 +2627,13 @@ llama-index-llms-openai = ">=0.3.0,<0.4.0"
 
 [[package]]
 name = "llama-index-core"
-version = "0.12.13"
+version = "0.12.14"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "llama_index_core-0.12.13-py3-none-any.whl", hash = "sha256:9708bb594bbddffd6ff0767242e49d8978d1ba60a2e62e071d9d123ad2f17e6f"},
-    {file = "llama_index_core-0.12.13.tar.gz", hash = "sha256:77af0161246ce1de38efc17cb6438dfff9e9558af00bcfac7dd4d0b7325efa4b"},
+    {file = "llama_index_core-0.12.14-py3-none-any.whl", hash = "sha256:6fdb30e3fadf98e7df75f9db5d06f6a7f8503ca545a71e048d786ff88012bd50"},
+    {file = "llama_index_core-0.12.14.tar.gz", hash = "sha256:378bbf5bf4d1a8c692d3a980c1a6ed3be7a9afb676a4960429dea15f62d06cd3"},
 ]
 
 [package.dependencies]
@@ -3319,13 +3320,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.60.0"
+version = "1.60.2"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.60.0-py3-none-any.whl", hash = "sha256:df06c43be8018274980ac363da07d4b417bd835ead1c66e14396f6f15a0d5dda"},
-    {file = "openai-1.60.0.tar.gz", hash = "sha256:7fa536cd4b644718645b874d2706e36dbbef38b327e42ca0623275da347ee1a9"},
+    {file = "openai-1.60.2-py3-none-any.whl", hash = "sha256:993bd11b96900b9098179c728026f016b4982ded7ee30dfcf4555eab1171fff9"},
+    {file = "openai-1.60.2.tar.gz", hash = "sha256:a8f843e10f2855713007f491d96afb2694b11b5e02cb97c7d01a0be60bc5bb51"},
 ]
 
 [package.dependencies]
@@ -3790,13 +3791,13 @@ virtualenv = ">=20.10.0"
 
 [[package]]
 name = "prettytable"
-version = "3.12.0"
+version = "3.13.0"
 description = "A simple Python library for easily displaying tabular data in a visually appealing ASCII table format"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "prettytable-3.12.0-py3-none-any.whl", hash = "sha256:77ca0ad1c435b6e363d7e8623d7cc4fcf2cf15513bf77a1c1b2e814930ac57cc"},
-    {file = "prettytable-3.12.0.tar.gz", hash = "sha256:f04b3e1ba35747ac86e96ec33e3bb9748ce08e254dc2a1c6253945901beec804"},
+    {file = "prettytable-3.13.0-py3-none-any.whl", hash = "sha256:d4f5817a248b77ddaa25b27007566c0a6a064308d991516b61b436ffdbb4f8e9"},
+    {file = "prettytable-3.13.0.tar.gz", hash = "sha256:30e1a097a7acb075b5c488ffe01195349b37009c2d43ca7fa8b5f6a61daace5b"},
 ]
 
 [package.dependencies]
@@ -3973,6 +3974,7 @@ files = [
     {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
     {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
     {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
+    {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
     {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
     {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
     {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@@ -4032,6 +4034,7 @@ files = [
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
+    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@@ -4352,13 +4355,13 @@ tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
 
 [[package]]
 name = "pypdf"
-version = "5.1.0"
+version = "5.2.0"
 description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc"},
-    {file = "pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740"},
+    {file = "pypdf-5.2.0-py3-none-any.whl", hash = "sha256:d107962ec45e65e3bd10c1d9242bdbbedaa38193c9e3a6617bd6d996e5747b19"},
+    {file = "pypdf-5.2.0.tar.gz", hash = "sha256:7c38e68420f038f2c4998fd9d6717b6db4f6cef1642e9cf384d519c9cf094663"},
 ]
 
 [package.dependencies]
@@ -4384,13 +4387,13 @@ files = [
 
 [[package]]
 name = "pyright"
-version = "1.1.392.post0"
+version = "1.1.393"
 description = "Command line wrapper for pyright"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "pyright-1.1.392.post0-py3-none-any.whl", hash = "sha256:252f84458a46fa2f0fd4e2f91fc74f50b9ca52c757062e93f6c250c0d8329eb2"},
-    {file = "pyright-1.1.392.post0.tar.gz", hash = "sha256:3b7f88de74a28dcfa90c7d90c782b6569a48c2be5f9d4add38472bdaac247ebd"},
+    {file = "pyright-1.1.393-py3-none-any.whl", hash = "sha256:8320629bb7a44ca90944ba599390162bf59307f3d9fb6e27da3b7011b8c17ae5"},
+    {file = "pyright-1.1.393.tar.gz", hash = "sha256:aeeb7ff4e0364775ef416a80111613f91a05c8e01e58ecfefc370ca0db7aed9c"},
 ]
 
 [package.dependencies]
@@ -4849,13 +4852,13 @@ prompt_toolkit = ">=2.0,<4.0"
 
 [[package]]
 name = "referencing"
-version = "0.36.1"
+version = "0.36.2"
 description = "JSON Referencing + Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "referencing-0.36.1-py3-none-any.whl", hash = "sha256:363d9c65f080d0d70bc41c721dce3c7f3e77fc09f269cd5c8813da18069a6794"},
-    {file = "referencing-0.36.1.tar.gz", hash = "sha256:ca2e6492769e3602957e9b831b94211599d2aade9477f5d44110d2530cf9aade"},
+    {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"},
+    {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"},
 ]
 
 [package.dependencies]
@@ -5148,13 +5151,13 @@ asn1crypto = ">=1.5.1"
 
 [[package]]
 name = "semver"
-version = "3.0.2"
+version = "3.0.4"
 description = "Python helper for Semantic Versioning (https://semver.org)"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"},
-    {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"},
+    {file = "semver-3.0.4-py3-none-any.whl", hash = "sha256:9c824d87ba7f7ab4a1890799cec8596f15c1241cb473404ea1cb0c55e4b04746"},
+    {file = "semver-3.0.4.tar.gz", hash = "sha256:afc7d8c584a5ed0a11033af086e8af226a9c0b206f313e0301f8dd7b6b589602"},
 ]
 
 [[package]]
@@ -5447,13 +5450,13 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 
 [[package]]
 name = "starlette"
-version = "0.45.2"
+version = "0.45.3"
 description = "The little ASGI library that shines."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "starlette-0.45.2-py3-none-any.whl", hash = "sha256:4daec3356fb0cb1e723a5235e5beaf375d2259af27532958e2d79df549dad9da"},
-    {file = "starlette-0.45.2.tar.gz", hash = "sha256:bba1831d15ae5212b22feab2f218bab6ed3cd0fc2dc1d4442443bb1ee52260e0"},
+    {file = "starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d"},
+    {file = "starlette-0.45.3.tar.gz", hash = "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f"},
 ]
 
 [package.dependencies]
@@ -6439,4 +6442,4 @@ tests = ["wikipedia"]
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.14,>=3.10"
-content-hash = "58f66b702bd791fcf73f48fa59a1bb0930370832427c1660ebbc81b9c58d1123"
+content-hash = "36eb749e2733dad52b29f8032aa0d3808b82093cb5c2d1bdbfa706688781f746"
diff --git a/pyproject.toml b/pyproject.toml
index ccb16c6501..064cac0e12 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "letta"
-version = "0.6.16"
+version = "0.6.17"
 packages = [
     {include = "letta"},
 ]
@@ -59,8 +59,8 @@ nltk = "^3.8.1"
 jinja2 = "^3.1.5"
 locust = {version = "^2.31.5", optional = true}
 wikipedia = {version = "^1.4.0", optional = true}
-composio-langchain = "^0.6.15"
-composio-core = "^0.6.15"
+composio-langchain = "^0.6.19"
+composio-core = "^0.6.19"
 alembic = "^1.13.3"
 pyhumps = "^3.8.0"
 psycopg2 = {version = "^2.9.10", optional = true}
@@ -77,7 +77,7 @@ llama-index = "^0.12.2"
 llama-index-embeddings-openai = "^0.3.1"
 e2b-code-interpreter = {version = "^1.0.3", optional = true}
 anthropic = "^0.43.0"
-letta_client = "^0.1.16"
+letta_client = "^0.1.23"
 openai = "^1.60.0"
 colorama = "^0.4.6"
 
diff --git a/tests/integration_test_chat_completions.py b/tests/integration_test_chat_completions.py
new file mode 100644
index 0000000000..34b133a49d
--- /dev/null
+++ b/tests/integration_test_chat_completions.py
@@ -0,0 +1,105 @@
+import os
+import threading
+import time
+import uuid
+
+import pytest
+from dotenv import load_dotenv
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+
+from letta import RESTClient, create_client
+from letta.client.streaming import _sse_post
+from letta.schemas.agent import AgentState
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.enums import MessageStreamStatus
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, UserMessage
+from letta.schemas.usage import LettaUsageStatistics
+
+
+def run_server():
+    load_dotenv()
+
+    # _reset_config()
+
+    from letta.server.rest_api.app import start_server
+
+    print("Starting server...")
+    start_server(debug=True)
+
+
+@pytest.fixture(
+    scope="module",
+)
+def client():
+    # get URL from enviornment
+    server_url = os.getenv("LETTA_SERVER_URL")
+    if server_url is None:
+        # run server in thread
+        server_url = "http://localhost:8283"
+        print("Starting server thread")
+        thread = threading.Thread(target=run_server, daemon=True)
+        thread.start()
+        time.sleep(5)
+    print("Running client tests with server:", server_url)
+    # create user via admin client
+    client = create_client(base_url=server_url, token=None)  # This yields control back to the test function
+    client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini"))
+    client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
+    yield client
+
+
+# Fixture for test agent
+@pytest.fixture(scope="module")
+def agent_state(client: RESTClient):
+    agent_state = client.create_agent(name=f"test_client_{str(uuid.uuid4())}")
+    yield agent_state
+
+    # delete agent
+    client.delete_agent(agent_state.id)
+
+
+def test_voice_streaming(mock_e2b_api_key_none, client: RESTClient, agent_state: AgentState):
+    """
+    Test voice streaming for chat completions using the streaming API.
+
+    This test ensures the SSE (Server-Sent Events) response from the voice streaming endpoint
+    adheres to the expected structure and contains valid data for each type of chunk.
+    """
+
+    # Prepare the chat completion request with streaming enabled
+    request = ChatCompletionRequest(
+        model="gpt-4o-mini",
+        messages=[UserMessage(content="Tell me something interesting about bananas.")],
+        user=agent_state.id,
+        stream=True,
+    )
+
+    # Perform a POST request to the voice/chat/completions endpoint and collect the streaming response
+    response = _sse_post(
+        f"{client.base_url}/openai/{client.api_prefix}/chat/completions", request.model_dump(exclude_none=True), client.headers
+    )
+
+    # Convert the streaming response into a list of chunks for processing
+    chunks = list(response)
+
+    for idx, chunk in enumerate(chunks):
+        if isinstance(chunk, ChatCompletionChunk):
+            # Assert that the chunk has at least one choice (a response from the model)
+            assert len(chunk.choices) > 0, "Each ChatCompletionChunk should have at least one choice."
+
+        elif isinstance(chunk, LettaUsageStatistics):
+            # Assert that the usage statistics contain valid token counts
+            assert chunk.completion_tokens > 0, "Completion tokens should be greater than 0 in LettaUsageStatistics."
+            assert chunk.prompt_tokens > 0, "Prompt tokens should be greater than 0 in LettaUsageStatistics."
+            assert chunk.total_tokens > 0, "Total tokens should be greater than 0 in LettaUsageStatistics."
+            assert chunk.step_count == 1, "Step count in LettaUsageStatistics should always be 1 for a single request."
+
+        elif isinstance(chunk, MessageStreamStatus):
+            # Assert that the stream ends with a 'done' status
+            assert chunk == MessageStreamStatus.done, "The last chunk should indicate the stream has completed."
+            assert idx == len(chunks) - 1, "The 'done' status must be the last chunk in the stream."
+
+        else:
+            # Fail the test if an unexpected chunk type is encountered
+            pytest.fail(f"Unexpected chunk type: {chunk}", pytrace=True)
diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py
index 8736825b1f..92c929f97c 100644
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@@ -1,6 +1,4 @@
 import json
-import secrets
-import string
 
 import pytest
 
@@ -9,30 +7,33 @@
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.letta_message import ToolReturnMessage
 from letta.schemas.llm_config import LLMConfig
+from letta.schemas.memory import ChatMemory
 from tests.helpers.utils import retry_until_success
+from tests.utils import wait_for_incoming_message
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def client():
     client = create_client()
-    client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini"))
+    client.set_default_llm_config(LLMConfig.default_config("gpt-4o"))
     client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
 
     yield client
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def agent_obj(client: LocalClient):
     """Create a test agent that we can call functions on"""
-    agent_state = client.create_agent(include_multi_agent_tools=True)
+    send_message_to_agent_and_wait_for_reply_tool_id = client.get_tool_id(name="send_message_to_agent_and_wait_for_reply")
+    agent_state = client.create_agent(tool_ids=[send_message_to_agent_and_wait_for_reply_tool_id])
 
     agent_obj = client.server.load_agent(agent_id=agent_state.id, actor=client.user)
     yield agent_obj
 
-    client.delete_agent(agent_obj.agent_state.id)
+    # client.delete_agent(agent_obj.agent_state.id)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def other_agent_obj(client: LocalClient):
     """Create another test agent that we can call functions on"""
     agent_state = client.create_agent(include_multi_agent_tools=False)
@@ -119,18 +120,18 @@ def test_recall(client, agent_obj):
 # This test is nondeterministic, so we retry until we get the perfect behavior from the LLM
 @retry_until_success(max_attempts=5, sleep_time_seconds=2)
 def test_send_message_to_agent(client, agent_obj, other_agent_obj):
-    long_random_string = "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10))
+    secret_word = "banana"
 
     # Encourage the agent to send a message to the other agent_obj with the secret string
     client.send_message(
         agent_id=agent_obj.agent_state.id,
         role="user",
-        message=f"Use your tool to send a message to another agent with id {other_agent_obj.agent_state.id} with the secret password={long_random_string}",
+        message=f"Use your tool to send a message to another agent with id {other_agent_obj.agent_state.id} to share the secret word: {secret_word}!",
     )
 
     # Conversation search the other agent
-    result = base_functions.conversation_search(other_agent_obj, long_random_string)
-    assert long_random_string in result
+    result = base_functions.conversation_search(other_agent_obj, secret_word)
+    assert secret_word in result
 
     # Search the sender agent for the response from another agent
     in_context_messages = agent_obj.agent_manager.get_in_context_messages(agent_id=agent_obj.agent_state.id, actor=agent_obj.user)
@@ -144,7 +145,7 @@ def test_send_message_to_agent(client, agent_obj, other_agent_obj):
 
     print(f"In context messages of the sender agent (without system):\n\n{"\n".join([m.text for m in in_context_messages[1:]])}")
     if not found:
-        pytest.fail(f"Was not able to find an instance of the target snippet: {target_snippet}")
+        raise Exception(f"Was not able to find an instance of the target snippet: {target_snippet}")
 
     # Test that the agent can still receive messages fine
     response = client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="So what did the other agent say?")
@@ -161,10 +162,11 @@ def test_send_message_to_agents_with_tags(client):
     for agent in prev_worker_agents:
         client.delete_agent(agent.id)
 
-    long_random_string = "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(10))
+    secret_word = "banana"
 
     # Create "manager" agent
-    manager_agent_state = client.create_agent(include_multi_agent_tools=True)
+    send_message_to_agents_matching_all_tags_tool_id = client.get_tool_id(name="send_message_to_agents_matching_all_tags")
+    manager_agent_state = client.create_agent(tool_ids=[send_message_to_agents_matching_all_tags_tool_id])
     manager_agent = client.server.load_agent(agent_id=manager_agent_state.id, actor=client.user)
 
     # Create 3 worker agents
@@ -187,7 +189,7 @@ def test_send_message_to_agents_with_tags(client):
     response = client.send_message(
         agent_id=manager_agent.agent_state.id,
         role="user",
-        message=f"Send a message to all agents with tags {worker_tags} informing them of the secret password={long_random_string}",
+        message=f"Send a message to all agents with tags {worker_tags} informing them of the secret word: {secret_word}!",
     )
 
     for m in response.messages:
@@ -201,8 +203,8 @@ def test_send_message_to_agents_with_tags(client):
 
     # Conversation search the worker agents
     for agent in worker_agents:
-        result = base_functions.conversation_search(agent, long_random_string)
-        assert long_random_string in result
+        result = base_functions.conversation_search(agent, secret_word)
+        assert secret_word in result
 
     # Test that the agent can still receive messages fine
     response = client.send_message(agent_id=manager_agent.agent_state.id, role="user", message="So what did the other agents say?")
@@ -212,3 +214,56 @@ def test_send_message_to_agents_with_tags(client):
     client.delete_agent(manager_agent_state.id)
     for agent in worker_agents:
         client.delete_agent(agent.agent_state.id)
+
+
+@retry_until_success(max_attempts=5, sleep_time_seconds=2)
+def test_agents_async_simple(client):
+    """
+    Test two agents with multi-agent tools sending messages back and forth to count to 5.
+    The chain is started by prompting one of the agents.
+    """
+    # Cleanup from potentially failed previous runs
+    existing_agents = client.server.agent_manager.list_agents(client.user)
+    for agent in existing_agents:
+        client.delete_agent(agent.id)
+
+    # Create two agents with multi-agent tools
+    send_message_to_agent_async_tool_id = client.get_tool_id(name="send_message_to_agent_async")
+    memory_a = ChatMemory(
+        human="Chad - I'm interested in hearing poem.",
+        persona="You are an AI agent that can communicate with your agent buddy using `send_message_to_agent_async`, who has some great poem ideas (so I've heard).",
+    )
+    charles_state = client.create_agent(name="charles", memory=memory_a, tool_ids=[send_message_to_agent_async_tool_id])
+    charles = client.server.load_agent(agent_id=charles_state.id, actor=client.user)
+
+    memory_b = ChatMemory(
+        human="No human - you are to only communicate with the other AI agent.",
+        persona="You are an AI agent that can communicate with your agent buddy using `send_message_to_agent_async`, who is interested in great poem ideas.",
+    )
+    sarah_state = client.create_agent(name="sarah", memory=memory_b, tool_ids=[send_message_to_agent_async_tool_id])
+
+    # Start the count chain with Agent1
+    initial_prompt = f"I want you to talk to the other agent with ID {sarah_state.id} using `send_message_to_agent_async`. Specifically, I want you to ask him for a poem idea, and then craft a poem for me."
+    client.send_message(
+        agent_id=charles.agent_state.id,
+        role="user",
+        message=initial_prompt,
+    )
+
+    found_in_charles = wait_for_incoming_message(
+        client=client,
+        agent_id=charles_state.id,
+        substring="[Incoming message from agent with ID",
+        max_wait_seconds=10,
+        sleep_interval=0.5,
+    )
+    assert found_in_charles, "Charles never received the system message from Sarah (timed out)."
+
+    found_in_sarah = wait_for_incoming_message(
+        client=client,
+        agent_id=sarah_state.id,
+        substring="[Incoming message from agent with ID",
+        max_wait_seconds=10,
+        sleep_interval=0.5,
+    )
+    assert found_in_sarah, "Sarah never received the system message from Charles (timed out)."
diff --git a/tests/test_managers.py b/tests/test_managers.py
index d50808432d..16d7a2d0e3 100644
--- a/tests/test_managers.py
+++ b/tests/test_managers.py
@@ -44,8 +44,7 @@
 from letta.schemas.environment_variables import SandboxEnvironmentVariableCreate, SandboxEnvironmentVariableUpdate
 from letta.schemas.file import FileMetadata as PydanticFileMetadata
 from letta.schemas.job import Job as PydanticJob
-from letta.schemas.job import JobUpdate
-from letta.schemas.letta_request import LettaRequestConfig
+from letta.schemas.job import JobUpdate, LettaRequestConfig
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.message import MessageCreate, MessageUpdate
@@ -59,7 +58,6 @@
 from letta.schemas.tool import Tool as PydanticTool
 from letta.schemas.tool import ToolCreate, ToolUpdate
 from letta.schemas.tool_rule import InitToolRule
-from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User as PydanticUser
 from letta.schemas.user import UserUpdate
 from letta.server.server import SyncServer
@@ -3019,13 +3017,14 @@ def test_get_run_messages(server: SyncServer, default_user: PydanticUser, sarah_
         PydanticMessage(
             organization_id=default_user.organization_id,
             agent_id=sarah_agent.id,
-            role=MessageRole.user if i % 2 == 0 else MessageRole.assistant,
-            text=f"Test message {i}",
+            role=MessageRole.tool if i % 2 == 0 else MessageRole.assistant,
+            text=f"Test message {i}" if i % 2 == 1 else '{"status": "OK"}',
             tool_calls=(
-                [{"type": "function", "id": f"call_{i}", "function": {"name": "custom_tool", "arguments": '{"custom_arg": "test"}'}}]
+                [{"type": "function", "id": f"call_{i//2}", "function": {"name": "custom_tool", "arguments": '{"custom_arg": "test"}'}}]
                 if i % 2 == 1
                 else None
             ),
+            tool_call_id=f"call_{i//2}" if i % 2 == 0 else None,
         )
         for i in range(4)
     ]
@@ -3050,6 +3049,58 @@ def test_get_run_messages(server: SyncServer, default_user: PydanticUser, sarah_
         assert msg.tool_call.name == "custom_tool"
 
 
+def test_get_run_messages(server: SyncServer, default_user: PydanticUser, sarah_agent):
+    """Test getting messages for a run with request config."""
+    # Create a run with custom request config
+    run = server.job_manager.create_job(
+        pydantic_job=PydanticRun(
+            user_id=default_user.id,
+            status=JobStatus.created,
+            request_config=LettaRequestConfig(
+                use_assistant_message=True, assistant_message_tool_name="custom_tool", assistant_message_tool_kwarg="custom_arg"
+            ),
+        ),
+        actor=default_user,
+    )
+
+    # Add some messages
+    messages = [
+        PydanticMessage(
+            organization_id=default_user.organization_id,
+            agent_id=sarah_agent.id,
+            role=MessageRole.tool if i % 2 == 0 else MessageRole.assistant,
+            text=f"Test message {i}" if i % 2 == 1 else '{"status": "OK"}',
+            tool_calls=(
+                [{"type": "function", "id": f"call_{i//2}", "function": {"name": "custom_tool", "arguments": '{"custom_arg": "test"}'}}]
+                if i % 2 == 1
+                else None
+            ),
+            tool_call_id=f"call_{i//2}" if i % 2 == 0 else None,
+        )
+        for i in range(4)
+    ]
+
+    for msg in messages:
+        created_msg = server.message_manager.create_message(msg, actor=default_user)
+        server.job_manager.add_message_to_job(job_id=run.id, message_id=created_msg.id, actor=default_user)
+
+    # Get messages and verify they're converted correctly
+    result = server.job_manager.get_run_messages(run_id=run.id, actor=default_user)
+
+    # Verify correct number of messages. Assistant messages should be parsed
+    assert len(result) == 4
+
+    # Verify assistant messages are parsed according to request config
+    assistant_messages = [msg for msg in result if msg.message_type == "assistant_message"]
+    reasoning_messages = [msg for msg in result if msg.message_type == "reasoning_message"]
+    assert len(assistant_messages) == 2
+    assert len(reasoning_messages) == 2
+    for msg in assistant_messages:
+        assert msg.content == "test"
+    for msg in reasoning_messages:
+        assert "Test message" in msg.reasoning
+
+
 # ======================================================================================================================
 # JobManager Tests - Usage Statistics
 # ======================================================================================================================
diff --git a/tests/test_optimistic_json_parser.py b/tests/test_optimistic_json_parser.py
new file mode 100644
index 0000000000..4f1888544d
--- /dev/null
+++ b/tests/test_optimistic_json_parser.py
@@ -0,0 +1,248 @@
+import json
+from unittest.mock import patch
+
+import pytest
+
+from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
+
+
+@pytest.fixture
+def strict_parser():
+    """Provides a fresh OptimisticJSONParser instance in strict mode."""
+    return OptimisticJSONParser(strict=True)
+
+
+@pytest.fixture
+def lenient_parser():
+    """Provides a fresh OptimisticJSONParser instance in non-strict mode."""
+    return OptimisticJSONParser(strict=False)
+
+
+def test_parse_empty_input(strict_parser):
+    """
+    Test parsing an empty string. Should fall back to parsing "{}".
+    """
+    result = strict_parser.parse("")
+    assert result == {}, "Empty input should parse as an empty dict."
+
+
+def test_parse_valid_json(strict_parser):
+    """
+    Test parsing a valid JSON string using the standard json.loads logic.
+    """
+    input_str = '{"name": "John", "age": 30}'
+    result = strict_parser.parse(input_str)
+    assert result == {"name": "John", "age": 30}, "Should parse valid JSON correctly."
+
+
+def test_parse_valid_json_array(strict_parser):
+    """
+    Test parsing a valid JSON array.
+    """
+    input_str = '[1, 2, 3, "four"]'
+    result = strict_parser.parse(input_str)
+    assert result == [1, 2, 3, "four"], "Should parse valid JSON array correctly."
+
+
+def test_parse_partial_json_object(strict_parser):
+    """
+    Test parsing a JSON object with extra trailing characters.
+    The extra characters should trigger on_extra_token.
+    """
+    input_str = '{"key": "value"} trailing'
+    with patch.object(strict_parser, "on_extra_token") as mock_callback:
+        result = strict_parser.parse(input_str)
+
+    assert result == {"key": "value"}, "Should parse the JSON part properly."
+    assert strict_parser.last_parse_reminding.strip() == "trailing", "The leftover reminding should be 'trailing'."
+    mock_callback.assert_called_once()
+
+
+def test_parse_partial_json_array(strict_parser):
+    """
+    Test parsing a JSON array with extra tokens.
+    """
+    input_str = "[1, 2, 3] extra_tokens"
+    result = strict_parser.parse(input_str)
+    assert result == [1, 2, 3], "Should parse array portion properly."
+    assert strict_parser.last_parse_reminding.strip() == "extra_tokens", "The leftover reminding should capture extra tokens."
+
+
+def test_parse_number_cases(strict_parser):
+    """
+    Test various number formats.
+    """
+    # We'll parse them individually to ensure the fallback parser handles them.
+    test_cases = {
+        "123": 123,
+        "-42": -42,
+        "3.14": 3.14,
+        "-0.001": -0.001,
+        "10.": 10,  # This should convert to int in our parser.
+        ".5": 0.5 if not strict_parser.strict else ".5",
+    }
+
+    for num_str, expected in test_cases.items():
+        parsed = strict_parser.parse(num_str)
+        if num_str == ".5" and strict_parser.strict:
+            # Strict mode won't parse ".5" directly as a valid float by default
+            # Our current logic may end up raising or partial-parsing.
+            # Adjust as necessary based on your actual parser's behavior.
+            assert parsed == ".5" or parsed == 0.5, "Strict handling of '.5' can vary."
+        else:
+            assert parsed == expected, f"Number parsing failed for {num_str}"
+
+
+def test_parse_boolean_true(strict_parser):
+    assert strict_parser.parse("true") is True, "Should parse 'true'."
+    # Check leftover
+    assert strict_parser.last_parse_reminding == "", "No extra tokens expected."
+
+
+def test_parse_boolean_false(strict_parser):
+    assert strict_parser.parse("false") is False, "Should parse 'false'."
+
+
+def test_parse_null(strict_parser):
+    assert strict_parser.parse("null") is None, "Should parse 'null'."
+
+
+@pytest.mark.parametrize("invalid_boolean", ["tru", "fa", "fal", "True", "False"])
+def test_parse_invalid_booleans(strict_parser, invalid_boolean):
+    """
+    Test some invalid booleans. The parser tries to parse them as partial if possible.
+    If it fails, it may raise an exception or parse partially based on the code.
+    """
+    try:
+        result = strict_parser.parse(invalid_boolean)
+        # If it doesn't raise, it might parse partially or incorrectly.
+        # Check leftover or the returned data.
+        # Adjust your assertions based on actual parser behavior.
+        assert result in [True, False, invalid_boolean], f"Unexpected parse result for {invalid_boolean}: {result}"
+    except json.JSONDecodeError:
+        # This is also a valid outcome for truly invalid strings in strict mode.
+        pass
+
+
+def test_parse_string_with_escapes(strict_parser):
+    """
+    Test a string containing escaped quotes.
+    """
+    input_str = r'"This is a \"test\" string"'
+    result = strict_parser.parse(input_str)
+    assert result == 'This is a "test" string', "String with escaped quotes should parse correctly."
+
+
+def test_parse_incomplete_string_strict(strict_parser):
+    """
+    Test how a strict parser handles an incomplete string.
+    """
+    input_str = '"Unfinished string with no end'
+    try:
+        strict_parser.parse(input_str)
+        pytest.fail("Expected an error or partial parse with leftover tokens in strict mode.")
+    except json.JSONDecodeError:
+        pass  # Strict mode might raise
+
+
+def test_parse_incomplete_string_lenient(lenient_parser):
+    """
+    In non-strict mode, incomplete strings may be returned as-is.
+    """
+    input_str = '"Unfinished string with no end'
+    result = lenient_parser.parse(input_str)
+    assert result == "Unfinished string with no end", "Lenient mode should return the incomplete string without quotes."
+
+
+def test_parse_incomplete_number_strict(strict_parser):
+    """
+    Test how a strict parser handles an incomplete number, like '-' or '.'.
+    In strict mode, the parser now raises JSONDecodeError rather than
+    returning the partial string.
+    """
+    input_str = "-"
+    with pytest.raises(json.JSONDecodeError):
+        strict_parser.parse(input_str)
+
+
+def test_object_with_missing_colon(strict_parser):
+    """
+    Test parsing an object missing a colon. Should raise or partially parse.
+    """
+    input_str = '{"key" "value"}'
+    try:
+        strict_parser.parse(input_str)
+        pytest.fail("Parser should raise or handle error with missing colon.")
+    except json.JSONDecodeError:
+        pass
+
+
+def test_object_with_missing_value(strict_parser):
+    """
+    Test parsing an object with a key but no value before a comma or brace.
+    """
+    input_str = '{"key":}'
+    # Depending on parser logic, "key" might map to None or raise an error.
+    result = strict_parser.parse(input_str)
+    # Expect partial parse: {'key': None}
+    assert result == {"key": None}, "Key without value should map to None."
+
+
+def test_array_with_trailing_comma(strict_parser):
+    """
+    Test array that might have a trailing comma before closing.
+    """
+    input_str = "[1, 2, 3, ]"
+    result = strict_parser.parse(input_str)
+    # The parser does not explicitly handle trailing commas in strict JSON.
+    # But the fallback logic may allow partial parse. Adjust assertions accordingly.
+    assert result == [1, 2, 3], "Trailing comma should be handled or partially parsed."
+
+
+def test_callback_invocation(strict_parser, capsys):
+    """
+    Verify that on_extra_token callback is invoked and prints expected content.
+    """
+    input_str = '{"a":1} leftover'
+    strict_parser.parse(input_str)
+    captured = capsys.readouterr().out
+    assert "Parsed JSON with extra tokens:" in captured, "Callback default_on_extra_token should print a message."
+
+
+def test_unknown_token(strict_parser):
+    """
+    Test parser behavior when encountering an unknown first character.
+    Should raise JSONDecodeError in strict mode.
+    """
+    input_str = "@invalid"
+    with pytest.raises(json.JSONDecodeError):
+        strict_parser.parse(input_str)
+
+
+def test_array_nested_objects(lenient_parser):
+    """
+    Test parsing a complex structure with nested arrays/objects.
+    """
+    input_str = '[ {"a":1}, {"b": [2,3]}, 4, "string"] leftover'
+    result = lenient_parser.parse(input_str)
+    expected = [{"a": 1}, {"b": [2, 3]}, 4, "string"]
+    assert result == expected, "Should parse nested arrays/objects correctly."
+    assert lenient_parser.last_parse_reminding.strip() == "leftover"
+
+
+def test_multiple_parse_calls(strict_parser):
+    """
+    Test calling parse() multiple times to ensure leftover is reset properly.
+    """
+    input_1 = '{"x":1} trailing1'
+    input_2 = "[2,3] trailing2"
+
+    # First parse
+    result_1 = strict_parser.parse(input_1)
+    assert result_1 == {"x": 1}
+    assert strict_parser.last_parse_reminding.strip() == "trailing1"
+
+    # Second parse
+    result_2 = strict_parser.parse(input_2)
+    assert result_2 == [2, 3]
+    assert strict_parser.last_parse_reminding.strip() == "trailing2"
diff --git a/tests/test_sdk_client.py b/tests/test_sdk_client.py
index 917cdafd90..5d56904156 100644
--- a/tests/test_sdk_client.py
+++ b/tests/test_sdk_client.py
@@ -10,7 +10,7 @@
 from letta_client import Letta as LettaSDKClient
 from letta_client import MessageCreate
 from letta_client.core import ApiError
-from letta_client.types import AgentState, LettaRequestConfig, ToolCallMessage, ToolReturnMessage
+from letta_client.types import AgentState, ToolReturnMessage
 
 # Constants
 SERVER_PORT = 8283
@@ -48,7 +48,7 @@ def agent(client: LettaSDKClient):
                 value="username: sarah",
             ),
         ],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
     )
     yield agent_state
@@ -74,7 +74,7 @@ def test_shared_blocks(client: LettaSDKClient):
             ),
         ],
         block_ids=[block.id],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
     )
     agent_state2 = client.agents.create(
@@ -86,7 +86,7 @@ def test_shared_blocks(client: LettaSDKClient):
             ),
         ],
         block_ids=[block.id],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
     )
 
@@ -138,7 +138,7 @@ def test_add_and_manage_tags_for_agent(client: LettaSDKClient):
                 value="username: sarah",
             ),
         ],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
     )
     assert len(agent.tags) == 0
@@ -181,6 +181,7 @@ def test_agent_tags(client: LettaSDKClient):
     all_agents = client.agents.list()
     for agent in all_agents:
         client.agents.delete(agent.id)
+
     # Create multiple agents with different tags
     agent1 = client.agents.create(
         memory_blocks=[
@@ -189,7 +190,7 @@ def test_agent_tags(client: LettaSDKClient):
                 value="username: sarah",
             ),
         ],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
         tags=["test", "agent1", "production"],
     )
@@ -201,7 +202,7 @@ def test_agent_tags(client: LettaSDKClient):
                 value="username: sarah",
             ),
         ],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
         tags=["test", "agent2", "development"],
     )
@@ -213,7 +214,7 @@ def test_agent_tags(client: LettaSDKClient):
                 value="username: sarah",
             ),
         ],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
         tags=["test", "agent3", "production"],
     )
@@ -392,7 +393,7 @@ def big_return():
                 content="call the big_return function",
             ),
         ],
-        config=LettaRequestConfig(use_assistant_message=False),
+        use_assistant_message=False,
     )
 
     response_message = None
@@ -428,7 +429,7 @@ def always_error():
                 content="call the always_error function",
             ),
         ],
-        config=LettaRequestConfig(use_assistant_message=False),
+        use_assistant_message=False,
     )
 
     response_message = None
@@ -494,7 +495,7 @@ def test_send_message_async(client: LettaSDKClient, agent: AgentState):
                 content=test_message,
             ),
         ],
-        config=LettaRequestConfig(use_assistant_message=False),
+        use_assistant_message=False,
     )
     assert run.id is not None
     assert run.status == "created"
@@ -521,9 +522,9 @@ def test_send_message_async(client: LettaSDKClient, agent: AgentState):
     tool_messages = client.runs.list_run_messages(run_id=run.id, role="tool")
     assert len(tool_messages) > 0
 
-    specific_tool_messages = [message for message in client.runs.list_run_messages(run_id=run.id) if isinstance(message, ToolCallMessage)]
-    assert specific_tool_messages[0].tool_call.name == "send_message"
-    assert len(specific_tool_messages) > 0
+    # specific_tool_messages = [message for message in client.runs.list_run_messages(run_id=run.id) if isinstance(message, ToolCallMessage)]
+    # assert specific_tool_messages[0].tool_call.name == "send_message"
+    # assert len(specific_tool_messages) > 0
 
     # Get and verify usage statistics
     usage = client.runs.retrieve_run_usage(run_id=run.id)
@@ -568,7 +569,7 @@ def another_test_tool():
     agent = client.agents.create(
         name=f"test_agent_{str(uuid.uuid4())}",
         memory_blocks=[offline_persona_block, mindy_block],
-        model="openai/gpt-4",
+        model="openai/gpt-4o-mini",
         embedding="openai/text-embedding-ada-002",
         tool_ids=[tool1.id, tool2.id],
         include_base_tools=False,
diff --git a/tests/test_server.py b/tests/test_server.py
index fe3dc1af55..ed5a33f5d5 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -26,6 +26,7 @@
 from letta.schemas.message import Message
 from letta.schemas.source import Source as PydanticSource
 from letta.server.server import SyncServer
+from letta.system import unpack_message
 
 from .utils import DummyDataConnector
 
@@ -332,7 +333,7 @@ def agent_id(server, user_id, base_tools):
             name="test_agent",
             tool_ids=[t.id for t in base_tools],
             memory_blocks=[],
-            model="openai/gpt-4o",
+            model="openai/gpt-4o-mini",
             embedding="openai/text-embedding-ada-002",
         ),
         actor=actor,
@@ -353,7 +354,7 @@ def other_agent_id(server, user_id, base_tools):
             name="test_agent_other",
             tool_ids=[t.id for t in base_tools],
             memory_blocks=[],
-            model="openai/gpt-4o",
+            model="openai/gpt-4o-mini",
             embedding="openai/text-embedding-ada-002",
         ),
         actor=actor,
@@ -552,7 +553,7 @@ def test_delete_agent_same_org(server: SyncServer, org_id: str, user: User):
         request=CreateAgent(
             name="nonexistent_tools_agent",
             memory_blocks=[],
-            model="openai/gpt-4o",
+            model="openai/gpt-4o-mini",
             embedding="openai/text-embedding-ada-002",
         ),
         actor=user,
@@ -636,6 +637,7 @@ def _test_get_messages_letta_format(
         limit=1000,
         reverse=reverse,
         return_message_object=True,
+        use_assistant_message=False,
     )
     assert all(isinstance(m, Message) for m in messages)
 
@@ -645,6 +647,7 @@ def _test_get_messages_letta_format(
         limit=1000,
         reverse=reverse,
         return_message_object=False,
+        use_assistant_message=False,
     )
     assert all(isinstance(m, LettaMessage) for m in letta_messages)
 
@@ -711,7 +714,7 @@ def _test_get_messages_letta_format(
 
             elif message.role == MessageRole.user:
                 assert isinstance(letta_message, UserMessage)
-                assert message.text == letta_message.content
+                assert unpack_message(message.text) == letta_message.content
                 letta_message_index += 1
 
             elif message.role == MessageRole.system:
@@ -914,13 +917,13 @@ def test_memory_rebuild_count(server, user, mock_e2b_api_key_none, base_tools, b
     # create agent
     agent_state = server.create_agent(
         request=CreateAgent(
-            name="memory_rebuild_test_agent",
+            name="test_memory_rebuild_count",
             tool_ids=[t.id for t in base_tools + base_memory_tools],
             memory_blocks=[
                 CreateBlock(label="human", value="The human's name is Bob."),
                 CreateBlock(label="persona", value="My name is Alice."),
             ],
-            model="openai/gpt-4o",
+            model="openai/gpt-4o-mini",
             embedding="openai/text-embedding-ada-002",
         ),
         actor=actor,
@@ -952,18 +955,11 @@ def count_system_messages_in_recall() -> Tuple[int, List[LettaMessage]]:
         num_system_messages, all_messages = count_system_messages_in_recall()
         assert num_system_messages == 1, (num_system_messages, all_messages)
 
-        # Assuming core memory append actually ran correctly, at this point there should be 2 messages
-        server.user_message(user_id=user.id, agent_id=agent_state.id, message="Append 'banana' to your core memory")
-
-        # At this stage, there should be 2 system message inside of recall storage
-        num_system_messages, all_messages = count_system_messages_in_recall()
-        assert num_system_messages == 2, (num_system_messages, all_messages)
-
         # Run server.load_agent, and make sure that the number of system messages is still 2
         server.load_agent(agent_id=agent_state.id, actor=actor)
 
         num_system_messages, all_messages = count_system_messages_in_recall()
-        assert num_system_messages == 2, (num_system_messages, all_messages)
+        assert num_system_messages == 1, (num_system_messages, all_messages)
 
     finally:
         # cleanup
@@ -1108,7 +1104,7 @@ def test_add_remove_tools_update_agent(server: SyncServer, user_id: str, base_to
                 CreateBlock(label="human", value="The human's name is Bob."),
                 CreateBlock(label="persona", value="My name is Alice."),
             ],
-            model="openai/gpt-4o",
+            model="openai/gpt-4o-mini",
             embedding="openai/text-embedding-ada-002",
             include_base_tools=False,
         ),
diff --git a/tests/test_v1_routes.py b/tests/test_v1_routes.py
index 4a0d027bc4..8a23254058 100644
--- a/tests/test_v1_routes.py
+++ b/tests/test_v1_routes.py
@@ -1,8 +1,8 @@
 from datetime import datetime
-from unittest.mock import MagicMock, Mock, patch
+from unittest.mock import MagicMock, Mock
 
 import pytest
-from composio.client.collections import ActionModel, ActionParametersModel, ActionResponseModel, AppModel
+from composio.client.collections import AppModel
 from fastapi.testclient import TestClient
 
 from letta.orm.errors import NoResultFound
@@ -98,65 +98,6 @@ def composio_apps():
     yield [affinity_app]
 
 
-@pytest.fixture
-def composio_actions():
-    yield [
-        ActionModel(
-            name="AFFINITY_GET_ALL_COMPANIES",
-            display_name="Get all companies",
-            parameters=ActionParametersModel(
-                properties={
-                    "cursor": {"default": None, "description": "Cursor for the next or previous page", "title": "Cursor", "type": "string"},
-                    "limit": {"default": 100, "description": "Number of items to include in the page", "title": "Limit", "type": "integer"},
-                    "ids": {"default": None, "description": "Company IDs", "items": {"type": "integer"}, "title": "Ids", "type": "array"},
-                    "fieldIds": {
-                        "default": None,
-                        "description": "Field IDs for which to return field data",
-                        "items": {"type": "string"},
-                        "title": "Fieldids",
-                        "type": "array",
-                    },
-                    "fieldTypes": {
-                        "default": None,
-                        "description": "Field Types for which to return field data",
-                        "items": {"enum": ["enriched", "global", "relationship-intelligence"], "title": "FieldtypesEnm", "type": "string"},
-                        "title": "Fieldtypes",
-                        "type": "array",
-                    },
-                },
-                title="GetAllCompaniesRequest",
-                type="object",
-                required=None,
-            ),
-            response=ActionResponseModel(
-                properties={
-                    "data": {"title": "Data", "type": "object"},
-                    "successful": {
-                        "description": "Whether or not the action execution was successful or not",
-                        "title": "Successful",
-                        "type": "boolean",
-                    },
-                    "error": {
-                        "anyOf": [{"type": "string"}, {"type": "null"}],
-                        "default": None,
-                        "description": "Error if any occurred during the execution of the action",
-                        "title": "Error",
-                    },
-                },
-                title="GetAllCompaniesResponse",
-                type="object",
-                required=["data", "successful"],
-            ),
-            appName="affinity",
-            appId="affinity",
-            tags=["companies", "important"],
-            enabled=False,
-            logo="https://cdn.jsdelivr.net/gh/ComposioHQ/open-logos@master/affinity.jpeg",
-            description="Affinity Api Allows Paginated Access To Company Info And Custom Fields. Use `Field Ids` Or `Field Types` To Specify Data In A Request. Retrieve Field I Ds/Types Via Get `/V2/Companies/Fields`. Export Permission Needed.",
-        )
-    ]
-
-
 def configure_mock_sync_server(mock_sync_server):
     # Mock sandbox config manager to return a valid API key
     mock_api_key = Mock()
@@ -259,55 +200,6 @@ def test_upsert_base_tools(client, mock_sync_server, add_integers_tool):
     )
 
 
-def test_list_composio_apps(client, mock_sync_server, composio_apps):
-    configure_mock_sync_server(mock_sync_server)
-
-    mock_sync_server.get_composio_apps.return_value = composio_apps
-
-    response = client.get("/v1/tools/composio/apps")
-
-    assert response.status_code == 200
-    assert len(response.json()) == 1
-    mock_sync_server.get_composio_apps.assert_called_once()
-
-
-def test_list_composio_actions_by_app(client, mock_sync_server, composio_actions):
-    configure_mock_sync_server(mock_sync_server)
-
-    mock_sync_server.get_composio_actions_from_app_name.return_value = composio_actions
-
-    response = client.get("/v1/tools/composio/apps/App1/actions")
-
-    assert response.status_code == 200
-    assert len(response.json()) == 1
-    mock_sync_server.get_composio_actions_from_app_name.assert_called_once_with(composio_app_name="App1", api_key="mock_composio_api_key")
-
-
-def test_add_composio_tool(client, mock_sync_server, add_integers_tool):
-    configure_mock_sync_server(mock_sync_server)
-
-    # Mock ToolCreate.from_composio to return the expected ToolCreate object
-    with patch("letta.schemas.tool.ToolCreate.from_composio") as mock_from_composio:
-        mock_from_composio.return_value = ToolCreate(
-            source_code=add_integers_tool.source_code,
-            json_schema=add_integers_tool.json_schema,
-        )
-
-        # Mock server behavior
-        mock_sync_server.tool_manager.create_or_update_composio_tool.return_value = add_integers_tool
-
-        # Perform the request
-        response = client.post(f"/v1/tools/composio/{add_integers_tool.name}", headers={"user_id": "test_user"})
-
-        # Assertions
-        assert response.status_code == 200
-        assert response.json()["id"] == add_integers_tool.id
-        mock_sync_server.tool_manager.create_or_update_composio_tool.assert_called_once()
-
-        # Verify the mocked from_composio method was called
-        mock_from_composio.assert_called_once_with(action_name=add_integers_tool.name)
-
-
 # ======================================================================================================================
 # Runs Routes Tests
 # ======================================================================================================================
diff --git a/tests/utils.py b/tests/utils.py
index 19a05a090a..46d83ed753 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,5 +1,6 @@
 import datetime
 import os
+import time
 from datetime import datetime
 from importlib import util
 from typing import Dict, Iterator, List, Tuple
@@ -8,6 +9,7 @@
 
 from letta.config import LettaConfig
 from letta.data_sources.connectors import DataConnector
+from letta.schemas.enums import MessageRole
 from letta.schemas.file import FileMetadata
 from letta.settings import TestSettings
 
@@ -145,3 +147,27 @@ def with_qdrant_storage(storage: list[str]):
         storage.append("qdrant")
 
     return storage
+
+
+def wait_for_incoming_message(
+    client,
+    agent_id: str,
+    substring: str = "[Incoming message from agent with ID",
+    max_wait_seconds: float = 10.0,
+    sleep_interval: float = 0.5,
+) -> bool:
+    """
+    Polls for up to `max_wait_seconds` to see if the agent's message list
+    contains a system message with `substring`.
+    Returns True if found, otherwise False after timeout.
+    """
+    deadline = time.time() + max_wait_seconds
+
+    while time.time() < deadline:
+        messages = client.server.message_manager.list_messages_for_agent(agent_id=agent_id)
+        # Check for the system message containing `substring`
+        if any(message.role == MessageRole.system and substring in (message.text or "") for message in messages):
+            return True
+        time.sleep(sleep_interval)
+
+    return False