diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore index 5b3735122408..4ddd437b5101 100644 --- a/docs/core_docs/.gitignore +++ b/docs/core_docs/.gitignore @@ -138,6 +138,8 @@ docs/how_to/multimodal_inputs.md docs/how_to/multimodal_inputs.mdx docs/how_to/migrate_agent.md docs/how_to/migrate_agent.mdx +docs/how_to/message_history.md +docs/how_to/message_history.mdx docs/how_to/merge_message_runs.md docs/how_to/merge_message_runs.mdx docs/how_to/logprobs.md @@ -198,14 +200,14 @@ docs/how_to/character_text_splitter.md docs/how_to/character_text_splitter.mdx docs/how_to/cancel_execution.md docs/how_to/cancel_execution.mdx +docs/how_to/callbacks_serverless.md +docs/how_to/callbacks_serverless.mdx docs/how_to/callbacks_runtime.md docs/how_to/callbacks_runtime.mdx docs/how_to/callbacks_custom_events.md docs/how_to/callbacks_custom_events.mdx docs/how_to/callbacks_constructor.md docs/how_to/callbacks_constructor.mdx -docs/how_to/callbacks_backgrounding.md -docs/how_to/callbacks_backgrounding.mdx docs/how_to/callbacks_attach.md docs/how_to/callbacks_attach.mdx docs/how_to/binding.md @@ -214,6 +216,12 @@ docs/how_to/assign.md docs/how_to/assign.mdx docs/how_to/agent_executor.md docs/how_to/agent_executor.mdx +docs/versions/migrating_memory/conversation_summary_memory.md +docs/versions/migrating_memory/conversation_summary_memory.mdx +docs/versions/migrating_memory/conversation_buffer_window_memory.md +docs/versions/migrating_memory/conversation_buffer_window_memory.mdx +docs/versions/migrating_memory/chat_history.md +docs/versions/migrating_memory/chat_history.mdx docs/integrations/vectorstores/weaviate.md docs/integrations/vectorstores/weaviate.mdx docs/integrations/vectorstores/upstash.md @@ -252,10 +260,6 @@ docs/integrations/toolkits/sql.md docs/integrations/toolkits/sql.mdx docs/integrations/toolkits/openapi.md docs/integrations/toolkits/openapi.mdx -docs/integrations/stores/in_memory.md -docs/integrations/stores/in_memory.mdx -docs/integrations/stores/file_system.md -docs/integrations/stores/file_system.mdx docs/integrations/text_embedding/togetherai.md docs/integrations/text_embedding/togetherai.mdx docs/integrations/text_embedding/openai.md @@ -278,12 +282,18 @@ docs/integrations/text_embedding/bedrock.md docs/integrations/text_embedding/bedrock.mdx docs/integrations/text_embedding/azure_openai.md docs/integrations/text_embedding/azure_openai.mdx +docs/integrations/stores/in_memory.md +docs/integrations/stores/in_memory.mdx +docs/integrations/stores/file_system.md +docs/integrations/stores/file_system.mdx docs/integrations/retrievers/tavily.md docs/integrations/retrievers/tavily.mdx docs/integrations/retrievers/kendra-retriever.md docs/integrations/retrievers/kendra-retriever.mdx docs/integrations/retrievers/exa.md docs/integrations/retrievers/exa.mdx +docs/integrations/retrievers/bm25.md +docs/integrations/retrievers/bm25.mdx docs/integrations/retrievers/bedrock-knowledge-bases.md docs/integrations/retrievers/bedrock-knowledge-bases.mdx docs/integrations/llms/together.md @@ -304,10 +314,10 @@ docs/integrations/llms/cloudflare_workersai.md docs/integrations/llms/cloudflare_workersai.mdx docs/integrations/llms/bedrock.md docs/integrations/llms/bedrock.mdx -docs/integrations/llms/arcjet.md -docs/integrations/llms/arcjet.mdx docs/integrations/llms/azure.md docs/integrations/llms/azure.mdx +docs/integrations/llms/arcjet.md +docs/integrations/llms/arcjet.mdx docs/integrations/chat/togetherai.md docs/integrations/chat/togetherai.mdx docs/integrations/chat/openai.md @@ -332,10 +342,10 @@ docs/integrations/chat/bedrock_converse.md docs/integrations/chat/bedrock_converse.mdx docs/integrations/chat/bedrock.md docs/integrations/chat/bedrock.mdx -docs/integrations/chat/arcjet.md -docs/integrations/chat/arcjet.mdx docs/integrations/chat/azure.md docs/integrations/chat/azure.mdx +docs/integrations/chat/arcjet.md +docs/integrations/chat/arcjet.mdx docs/integrations/chat/anthropic.md docs/integrations/chat/anthropic.mdx docs/integrations/retrievers/self_query/weaviate.md @@ -375,4 +385,4 @@ docs/integrations/document_loaders/file_loaders/pdf.mdx docs/integrations/document_loaders/file_loaders/directory.md docs/integrations/document_loaders/file_loaders/directory.mdx docs/integrations/document_loaders/file_loaders/csv.md -docs/integrations/document_loaders/file_loaders/csv.mdx +docs/integrations/document_loaders/file_loaders/csv.mdx \ No newline at end of file diff --git a/docs/core_docs/docs/how_to/chatbots_memory.ipynb b/docs/core_docs/docs/how_to/chatbots_memory.ipynb index e41b3f1bb465..197a11eab4c5 100644 --- a/docs/core_docs/docs/how_to/chatbots_memory.ipynb +++ b/docs/core_docs/docs/how_to/chatbots_memory.ipynb @@ -1,18 +1,19 @@ { "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_position: 1\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# How to manage memory\n", - "\n", - ":::info Prerequisites\n", - "\n", - "This guide assumes familiarity with the following:\n", - "\n", - "- [Chatbots](/docs/tutorials/chatbot)\n", - "\n", - ":::\n", + "# How to add memory to chatbots\n", "\n", "A key feature of chatbots is their ability to use content of previous conversation turns as context. This state management can take several forms, including:\n", "\n", @@ -20,18 +21,29 @@ "- The above, but trimming old messages to reduce the amount of distracting information the model has to deal with.\n", "- More complex modifications like synthesizing summaries for long running conversations.\n", "\n", - "We’ll go into more detail on a few techniques below!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "We'll go into more detail on a few techniques below!\n", + "\n", + ":::note\n", + "\n", + "This how-to guide previously built a chatbot using [RunnableWithMessageHistory](https://v03.api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html). You can access this version of the tutorial in the [v0.2 docs](https://js.langchain.com/v0.2/docs/how_to/chatbots_memory/).\n", + "\n", + "The LangGraph implementation offers a number of advantages over `RunnableWithMessageHistory`, including the ability to persist arbitrary components of an application's state (instead of only messages).\n", + "\n", + ":::\n", + "\n", "## Setup\n", "\n", - "You’ll need to install a few packages, and set any LLM API keys:\n", + "You'll need to install a few packages, select your chat model, and set its enviroment variable.\n", + "\n", + "```{=mdx}\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\"\n", + "\n", + "\n", + " @langchain/core @langchain/langgraph\n", + "\n", + "```\n", "\n", - "Let’s also set up a chat model that we’ll use for the below examples:\n", + "Let's set up a chat model that we'll use for the below examples.\n", "\n", "```{=mdx}\n", "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", @@ -46,42 +58,53 @@ "source": [ "## Message passing\n", "\n", - "The simplest form of memory is simply passing chat history messages into a chain. Here’s an example:" + "The simplest form of memory is simply passing chat history messages into a chain. Here's an example:" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({ model: \"gpt-4o\" })" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m`I said \"J'adore la programmation,\" which means \"I love programming\" in French.`\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m`I said \"J'adore la programmation,\" which means \"I love programming\" in French.`\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m21\u001b[39m, promptTokens: \u001b[33m61\u001b[39m, totalTokens: \u001b[33m82\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABSxUXVIBitFRBh9MpasB5jeEHfCA\",\n", + " \"content\": \"I said \\\"J'adore la programmation,\\\" which means \\\"I love programming\\\" in French.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 18,\n", + " \"promptTokens\": 58,\n", + " \"totalTokens\": 76\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 58,\n", + " \"output_tokens\": 18,\n", + " \"total_tokens\": 76\n", + " }\n", + "}\n" + ] } ], "source": [ @@ -119,303 +142,191 @@ "We can see that by passing the previous conversation into a chain, it can use it as context to answer questions. This is the basic concept underpinning chatbot memory - the rest of the guide will demonstrate convenient techniques for passing or reformatting messages." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Chat history\n", - "\n", - "It’s perfectly fine to store and pass messages directly as an array, but we can use LangChain’s built-in message history class to store and load messages as well. Instances of this class are responsible for storing and loading chat messages from persistent storage. LangChain integrates with many providers but for this demo we will use an ephemeral demo class.\n", - "\n", - "Here’s an example of the API:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Translate this sentence from English to French: I love programming.\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Translate this sentence from English to French: I love programming.\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"J'adore la programmation.\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"J'adore la programmation.\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {},\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " }\n", - "]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import { ChatMessageHistory } from \"langchain/stores/message/in_memory\";\n", - "\n", - "const demoEphemeralChatMessageHistory = new ChatMessageHistory();\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(\n", - " new HumanMessage(\n", - " \"Translate this sentence from English to French: I love programming.\"\n", - " )\n", - ");\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(\n", - " new AIMessage(\"J'adore la programmation.\")\n", - ");\n", - "\n", - "await demoEphemeralChatMessageHistory.getMessages();" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can use it directly to store conversation turns for our chain:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m'You just asked me to translate the sentence \"I love programming\" from English to French.'\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m'You just asked me to translate the sentence \"I love programming\" from English to French.'\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m18\u001b[39m, promptTokens: \u001b[33m73\u001b[39m, totalTokens: \u001b[33m91\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await demoEphemeralChatMessageHistory.clear();\n", - "\n", - "const input1 =\n", - " \"Translate this sentence from English to French: I love programming.\";\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(new HumanMessage(input1));\n", - "\n", - "const response = await chain.invoke({\n", - " messages: await demoEphemeralChatMessageHistory.getMessages(),\n", - "});\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(response);\n", - "\n", - "const input2 = \"What did I just ask you?\";\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(new HumanMessage(input2));\n", - "\n", - "await chain.invoke({\n", - " messages: await demoEphemeralChatMessageHistory.getMessages(),\n", - "});" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Automatic history management\n", "\n", - "The previous examples pass messages to the chain explicitly. This is a completely acceptable approach, but it does require external management of new messages. LangChain also includes an wrapper for LCEL chains that can handle this process automatically called `RunnableWithMessageHistory`.\n", - "\n", - "To show how it works, let’s slightly modify the above prompt to take a final `input` variable that populates a `HumanMessage` template after the chat history. This means that we will expect a `chat_history` parameter that contains all messages BEFORE the current messages instead of all messages:" + "The previous examples pass messages to the chain (and model) explicitly. This is a completely acceptable approach, but it does require external management of new messages. LangChain also provides a way to build applications that have memory using LangGraph's persistence. You can enable persistence in LangGraph applications by providing a `checkpointer` when compiling the graph." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ - "const runnableWithMessageHistoryPrompt = ChatPromptTemplate.fromMessages([\n", - " [\n", - " \"system\",\n", - " \"You are a helpful assistant. Answer all questions to the best of your ability.\",\n", - " ],\n", - " new MessagesPlaceholder(\"chat_history\"),\n", - " [\"human\", \"{input}\"],\n", - "]);\n", + "import { START, END, MessagesAnnotation, StateGraph, MemorySaver } from \"@langchain/langgraph\";\n", "\n", - "const chain2 = runnableWithMessageHistoryPrompt.pipe(llm);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We’ll pass the latest input to the conversation here and let the `RunnableWithMessageHistory` class wrap our chain and do the work of appending that `input` variable to the chat history.\n", "\n", - "Next, let’s declare our wrapped chain:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "import { RunnableWithMessageHistory } from \"@langchain/core/runnables\";\n", + "// Define the function that calls the model\n", + "const callModel = async (state: typeof MessagesAnnotation.State) => {\n", + " const systemPrompt = \n", + " \"You are a helpful assistant. \" +\n", + " \"Answer all questions to the best of your ability.\";\n", + " const messages = [{ role: \"system\", content: systemPrompt }, ...state.messages];\n", + " const response = await llm.invoke(messages);\n", + " return { messages: response };\n", + "};\n", "\n", - "const demoEphemeralChatMessageHistoryForChain = new ChatMessageHistory();\n", + "const workflow = new StateGraph(MessagesAnnotation)\n", + "// Define the node and edge\n", + " .addNode(\"model\", callModel)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", "\n", - "const chainWithMessageHistory = new RunnableWithMessageHistory({\n", - " runnable: chain2,\n", - " getMessageHistory: (_sessionId) => demoEphemeralChatMessageHistoryForChain,\n", - " inputMessagesKey: \"input\",\n", - " historyMessagesKey: \"chat_history\",\n", - "});" + "// Add simple in-memory checkpointer\n", + "// highlight-start\n", + "const memory = new MemorySaver();\n", + "const app = workflow.compile({ checkpointer: memory });\n", + "// highlight-end" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This class takes a few parameters in addition to the chain that we want to wrap:\n", - "\n", - "- A factory function that returns a message history for a given session id. This allows your chain to handle multiple users at once by loading different messages for different conversations.\n", - "- An `inputMessagesKey` that specifies which part of the input should be tracked and stored in the chat history. In this example, we want to track the string passed in as input.\n", - "- A `historyMessagesKey` that specifies what the previous messages should be injected into the prompt as. Our prompt has a `MessagesPlaceholder` named `chat_history`, so we specify this property to match.\n", - " (For chains with multiple outputs) an `outputMessagesKey` which specifies which output to store as history. This is the inverse of `inputMessagesKey`.\n", - "\n", - "We can invoke this new chain as normal, with an additional `configurable` field that specifies the particular `sessionId` to pass to the factory function. This is unused for the demo, but in real-world chains, you’ll want to return a chat history corresponding to the passed session:" + " We'll pass the latest input to the conversation here and let the LangGraph keep track of the conversation history using the checkpointer:" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 25, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m`The translation of \"I love programming\" in French is \"J'adore la programmation.\"`\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m`The translation of \"I love programming\" in French is \"J'adore la programmation.\"`\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m20\u001b[39m, promptTokens: \u001b[33m39\u001b[39m, totalTokens: \u001b[33m59\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"227b82a9-4084-46a5-ac79-ab9a3faa140e\",\n", + " \"content\": \"Translate to French: I love programming.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABSxVrvztgnasTeMSFbpZQmyYqjJZ\",\n", + " \"content\": \"J'adore la programmation.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 5,\n", + " \"promptTokens\": 35,\n", + " \"totalTokens\": 40\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 35,\n", + " \"output_tokens\": 5,\n", + " \"total_tokens\": 40\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "await chainWithMessageHistory.invoke(\n", + "await app.invoke(\n", " {\n", - " input:\n", - " \"Translate this sentence from English to French: I love programming.\",\n", + " messages: [\n", + " {\n", + " role: \"user\",\n", + " content: \"Translate to French: I love programming.\"\n", + " }\n", + " ]\n", " },\n", - " { configurable: { sessionId: \"unused\" } }\n", + " {\n", + " configurable: { thread_id: \"1\" }\n", + " }\n", ");" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 26, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m'You just asked for the translation of the sentence \"I love programming\" from English to French.'\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m'You just asked for the translation of the sentence \"I love programming\" from English to French.'\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m19\u001b[39m, promptTokens: \u001b[33m74\u001b[39m, totalTokens: \u001b[33m93\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"1a0560a4-9dcb-47a1-b441-80717e229706\",\n", + " \"content\": \"Translate to French: I love programming.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABSxVrvztgnasTeMSFbpZQmyYqjJZ\",\n", + " \"content\": \"J'adore la programmation.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 5,\n", + " \"promptTokens\": 35,\n", + " \"totalTokens\": 40\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"4f233a7d-4b08-4f53-bb60-cf0141a59721\",\n", + " \"content\": \"What did I just ask you?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABSxVs5QnlPfbihTOmJrCVg1Dh7Ol\",\n", + " \"content\": \"You asked me to translate \\\"I love programming\\\" into French.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 13,\n", + " \"promptTokens\": 55,\n", + " \"totalTokens\": 68\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_9f2bfdaa89\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 55,\n", + " \"output_tokens\": 13,\n", + " \"total_tokens\": 68\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "await chainWithMessageHistory.invoke(\n", + "await app.invoke(\n", " {\n", - " input: \"What did I just ask you?\",\n", + " messages: [\n", + " {\n", + " role: \"user\",\n", + " content: \"What did I just ask you?\"\n", + " }\n", + " ]\n", " },\n", - " { configurable: { sessionId: \"unused\" } }\n", + " {\n", + " configurable: { thread_id: \"1\" }\n", + " }\n", ");" ] }, @@ -429,159 +340,98 @@ "\n", "### Trimming messages\n", "\n", - "LLMs and chat models have limited context windows, and even if you’re not directly hitting limits, you may want to limit the amount of distraction the model has to deal with. One solution is to only load and store the most recent `n` messages. Let’s use an example history with some preloaded messages:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Hey there! I'm Nemo.\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Hey there! I'm Nemo.\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Hello!\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Hello!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {},\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " },\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"How are you today?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"How are you today?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Fine thanks!\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Fine thanks!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {},\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " }\n", - "]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await demoEphemeralChatMessageHistory.clear();\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(\n", - " new HumanMessage(\"Hey there! I'm Nemo.\")\n", - ");\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(new AIMessage(\"Hello!\"));\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(\n", - " new HumanMessage(\"How are you today?\")\n", - ");\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(new AIMessage(\"Fine thanks!\"));\n", - "\n", - "await demoEphemeralChatMessageHistory.getMessages();" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let’s use this message history with the `RunnableWithMessageHistory` chain we declared above:" + "LLMs and chat models have limited context windows, and even if you're not directly hitting limits, you may want to limit the amount of distraction the model has to deal with. One solution is trim the history messages before passing them to the model. Let's use an example history with the `app` we declared above:" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 27, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Your name is Nemo!\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Your name is Nemo!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m6\u001b[39m, promptTokens: \u001b[33m66\u001b[39m, totalTokens: \u001b[33m72\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"63057c3d-f980-4640-97d6-497a9f83ddee\",\n", + " \"content\": \"Hey there! I'm Nemo.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"c9f0c20a-8f55-4909-b281-88f2a45c4f05\",\n", + " \"content\": \"Hello!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"fd7fb3a0-7bc7-4e84-99a9-731b30637b55\",\n", + " \"content\": \"How are you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"09b0debb-1d4a-4856-8821-b037f5d96ecf\",\n", + " \"content\": \"Fine thanks!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"edc13b69-25a0-40ac-81b3-175e65dc1a9a\",\n", + " \"content\": \"What's my name?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABSxWKCTdRuh2ZifXsvFHSo5z5I0J\",\n", + " \"content\": \"Your name is Nemo! How can I assist you today, Nemo?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 14,\n", + " \"promptTokens\": 63,\n", + " \"totalTokens\": 77\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_a5d11b2ef2\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 63,\n", + " \"output_tokens\": 14,\n", + " \"total_tokens\": 77\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "const chainWithMessageHistory2 = new RunnableWithMessageHistory({\n", - " runnable: chain2,\n", - " getMessageHistory: (_sessionId) => demoEphemeralChatMessageHistory,\n", - " inputMessagesKey: \"input\",\n", - " historyMessagesKey: \"chat_history\",\n", - "});\n", + "const demoEphemeralChatHistory = [\n", + " { role: \"user\", content: \"Hey there! I'm Nemo.\" },\n", + " { role: \"assistant\", content: \"Hello!\" },\n", + " { role: \"user\", content: \"How are you today?\" },\n", + " { role: \"assistant\", content: \"Fine thanks!\" },\n", + "];\n", "\n", - "await chainWithMessageHistory2.invoke(\n", + "await app.invoke(\n", " {\n", - " input: \"What's my name?\",\n", + " messages: [\n", + " ...demoEphemeralChatHistory,\n", + " { role: \"user\", content: \"What's my name?\" }\n", + " ]\n", " },\n", - " { configurable: { sessionId: \"unused\" } }\n", + " {\n", + " configurable: { thread_id: \"2\" }\n", + " }\n", ");" ] }, @@ -589,551 +439,324 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can see the chain remembers the preloaded name.\n", + "We can see the app remembers the preloaded name.\n", "\n", - "But let’s say we have a very small context window, and we want to trim the number of messages passed to the chain to only the 2 most recent ones. We can use the `clear` method to remove messages and re-add them to the history. We don’t have to, but let’s put this method at the front of our chain to ensure it’s always called:" + "But let's say we have a very small context window, and we want to trim the number of messages passed to the model to only the 2 most recent ones. We can use the built in [trimMessages](/docs/how_to/trim_messages/) util to trim messages based on their token count before they reach our prompt. In this case we'll count each message as 1 \"token\" and keep only the last two messages:" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ - "import {\n", - " RunnablePassthrough,\n", - " RunnableSequence,\n", - "} from \"@langchain/core/runnables\";\n", - "\n", - "const trimMessages = async (_chainInput: Record) => {\n", - " const storedMessages = await demoEphemeralChatMessageHistory.getMessages();\n", - " if (storedMessages.length <= 2) {\n", - " return false;\n", - " }\n", - " await demoEphemeralChatMessageHistory.clear();\n", - " for (const message of storedMessages.slice(-2)) {\n", - " demoEphemeralChatMessageHistory.addMessage(message);\n", - " }\n", - " return true;\n", + "import { START, END, MessagesAnnotation, StateGraph, MemorySaver } from \"@langchain/langgraph\";\n", + "import { trimMessages } from \"@langchain/core/messages\";\n", + "\n", + "// Define trimmer\n", + "// highlight-start\n", + "// count each message as 1 \"token\" (tokenCounter: (msgs) => msgs.length) and keep only the last two messages\n", + "const trimmer = trimMessages({ strategy: \"last\", maxTokens: 2, tokenCounter: (msgs) => msgs.length });\n", + "// highlight-end\n", + "\n", + "// Define the function that calls the model\n", + "const callModel2 = async (state: typeof MessagesAnnotation.State) => {\n", + " // highlight-start\n", + " const trimmedMessages = await trimmer.invoke(state.messages);\n", + " const systemPrompt = \n", + " \"You are a helpful assistant. \" +\n", + " \"Answer all questions to the best of your ability.\";\n", + " const messages = [{ role: \"system\", content: systemPrompt }, ...trimmedMessages];\n", + " // highlight-end\n", + " const response = await llm.invoke(messages);\n", + " return { messages: response };\n", "};\n", "\n", - "const chainWithTrimming = RunnableSequence.from([\n", - " RunnablePassthrough.assign({ messages_trimmed: trimMessages }),\n", - " chainWithMessageHistory2,\n", - "]);" + "const workflow2 = new StateGraph(MessagesAnnotation)\n", + " // Define the node and edge\n", + " .addNode(\"model\", callModel2)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "// Add simple in-memory checkpointer\n", + "const app2 = workflow2.compile({ checkpointer: new MemorySaver() });" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let’s call this new chain and check the messages afterwards:" + "Let's call this new app and check the response" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 29, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m'P. Sherman is a fictional character who lives at 42 Wallaby Way, Sydney, from the movie \"Finding Nem'\u001b[39m... 3 more characters,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m'P. Sherman is a fictional character who lives at 42 Wallaby Way, Sydney, from the movie \"Finding Nem'\u001b[39m... 3 more characters,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m26\u001b[39m, promptTokens: \u001b[33m53\u001b[39m, totalTokens: \u001b[33m79\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"0d9330a0-d9d1-4aaf-8171-ca1ac6344f7c\",\n", + " \"content\": \"What is my name?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"3a24e88b-7525-4797-9fcd-d751a378d22c\",\n", + " \"content\": \"Fine thanks!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"276039c8-eba8-4c68-b015-81ec7704140d\",\n", + " \"content\": \"How are you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"2ad4f461-20e1-4982-ba3b-235cb6b02abd\",\n", + " \"content\": \"Hello!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"52213cae-953a-463d-a4a0-a7368c9ee4db\",\n", + " \"content\": \"Hey there! I'm Nemo.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABSxWe9BRDl1pmzkNIDawWwU3hvKm\",\n", + " \"content\": \"I'm sorry, but I don't have access to personal information about you unless you've shared it with me during our conversation. How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 30,\n", + " \"promptTokens\": 39,\n", + " \"totalTokens\": 69\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_3537616b13\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 39,\n", + " \"output_tokens\": 30,\n", + " \"total_tokens\": 69\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "await chainWithTrimming.invoke(\n", + "await app2.invoke(\n", " {\n", - " input: \"Where does P. Sherman live?\",\n", + " messages: [\n", + " ...demoEphemeralChatHistory,\n", + " { role: \"user\", content: \"What is my name?\" }\n", + " ]\n", " },\n", - " { configurable: { sessionId: \"unused\" } }\n", - ");" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"What's my name?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"What's my name?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Your name is Nemo!\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Your name is Nemo!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m6\u001b[39m, promptTokens: \u001b[33m66\u001b[39m, totalTokens: \u001b[33m72\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " },\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Where does P. Sherman live?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Where does P. Sherman live?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m'P. Sherman is a fictional character who lives at 42 Wallaby Way, Sydney, from the movie \"Finding Nem'\u001b[39m... 3 more characters,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m'P. Sherman is a fictional character who lives at 42 Wallaby Way, Sydney, from the movie \"Finding Nem'\u001b[39m... 3 more characters,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m26\u001b[39m, promptTokens: \u001b[33m53\u001b[39m, totalTokens: \u001b[33m79\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " }\n", - "]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await demoEphemeralChatMessageHistory.getMessages();" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And we can see that our history has removed the two oldest messages while still adding the most recent conversation at the end. The next time the chain is called, `trimMessages` will be called again, and only the two most recent messages will be passed to the model. In this case, this means that the model will forget the name we gave it the next time we invoke it:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"I'm sorry, I don't have access to your personal information. Can I help you with anything else?\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"I'm sorry, I don't have access to your personal information. Can I help you with anything else?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m22\u001b[39m, promptTokens: \u001b[33m73\u001b[39m, totalTokens: \u001b[33m95\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await chainWithTrimming.invoke(\n", " {\n", - " input: \"What is my name?\",\n", - " },\n", - " { configurable: { sessionId: \"unused\" } }\n", + " configurable: { thread_id: \"3\" }\n", + " }\n", ");" ] }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Where does P. Sherman live?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Where does P. Sherman live?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m'P. Sherman is a fictional character who lives at 42 Wallaby Way, Sydney, from the movie \"Finding Nem'\u001b[39m... 3 more characters,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m'P. Sherman is a fictional character who lives at 42 Wallaby Way, Sydney, from the movie \"Finding Nem'\u001b[39m... 3 more characters,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m26\u001b[39m, promptTokens: \u001b[33m53\u001b[39m, totalTokens: \u001b[33m79\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " },\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"What is my name?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"What is my name?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"I'm sorry, I don't have access to your personal information. Can I help you with anything else?\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"I'm sorry, I don't have access to your personal information. Can I help you with anything else?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m22\u001b[39m, promptTokens: \u001b[33m73\u001b[39m, totalTokens: \u001b[33m95\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " }\n", - "]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await demoEphemeralChatMessageHistory.getMessages();" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Summary memory\n", + "We can see that `trimMessages` was called and only the two most recent messages will be passed to the model. In this case, this means that the model forgot the name we gave it.\n", "\n", - "We can use this same pattern in other ways too. For example, we could use an additional LLM call to generate a summary of the conversation before calling our chain. Let’s recreate our chat history and chatbot chain:" + "Check out our [how to guide on trimming messages](/docs/how_to/trim_messages/) for more." ] }, { - "cell_type": "code", - "execution_count": 17, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "await demoEphemeralChatMessageHistory.clear();\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(\n", - " new HumanMessage(\"Hey there! I'm Nemo.\")\n", - ");\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(new AIMessage(\"Hello!\"));\n", - "\n", - "await demoEphemeralChatMessageHistory.addMessage(\n", - " new HumanMessage(\"How are you today?\")\n", - ");\n", + "### Summary memory\n", "\n", - "await demoEphemeralChatMessageHistory.addMessage(new AIMessage(\"Fine thanks!\"));" + "We can use this same pattern in other ways too. For example, we could use an additional LLM call to generate a summary of the conversation before calling our app. Let's recreate our chat history:" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ - "const runnableWithSummaryMemoryPrompt = ChatPromptTemplate.fromMessages([\n", - " [\n", - " \"system\",\n", - " \"You are a helpful assistant. Answer all questions to the best of your ability. The provided chat history includes facts about the user you are speaking with.\",\n", - " ],\n", - " new MessagesPlaceholder(\"chat_history\"),\n", - " [\"human\", \"{input}\"],\n", - "]);\n", - "\n", - "const summaryMemoryChain = runnableWithSummaryMemoryPrompt.pipe(llm);\n", - "\n", - "const chainWithMessageHistory3 = new RunnableWithMessageHistory({\n", - " runnable: summaryMemoryChain,\n", - " getMessageHistory: (_sessionId) => demoEphemeralChatMessageHistory,\n", - " inputMessagesKey: \"input\",\n", - " historyMessagesKey: \"chat_history\",\n", - "});" + "const demoEphemeralChatHistory2 = [\n", + " { role: \"user\", content: \"Hey there! I'm Nemo.\" },\n", + " { role: \"assistant\", content: \"Hello!\" },\n", + " { role: \"user\", content: \"How are you today?\" },\n", + " { role: \"assistant\", content: \"Fine thanks!\" },\n", + "];" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "And now, let’s create a function that will distill previous interactions into a summary. We can add this one to the front of the chain too:" + "And now, let's update the model-calling function to distill previous interactions into a summary:" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "const summarizeMessages = async (_chainInput: Record) => {\n", - " const storedMessages = await demoEphemeralChatMessageHistory.getMessages();\n", - " if (storedMessages.length === 0) {\n", - " return false;\n", + "import { START, END, MessagesAnnotation, StateGraph, MemorySaver } from \"@langchain/langgraph\";\n", + "import { RemoveMessage } from \"@langchain/core/messages\";\n", + "\n", + "\n", + "// Define the function that calls the model\n", + "const callModel3 = async (state: typeof MessagesAnnotation.State) => {\n", + " const systemPrompt = \n", + " \"You are a helpful assistant. \" +\n", + " \"Answer all questions to the best of your ability. \" +\n", + " \"The provided chat history includes a summary of the earlier conversation.\";\n", + " const systemMessage = { role: \"system\", content: systemPrompt };\n", + " const messageHistory = state.messages.slice(0, -1); // exclude the most recent user input\n", + " \n", + " // Summarize the messages if the chat history reaches a certain size\n", + " if (messageHistory.length >= 4) {\n", + " const lastHumanMessage = state.messages[state.messages.length - 1];\n", + " // Invoke the model to generate conversation summary\n", + " const summaryPrompt = \n", + " \"Distill the above chat messages into a single summary message. \" +\n", + " \"Include as many specific details as you can.\";\n", + " const summaryMessage = await llm.invoke([\n", + " ...messageHistory,\n", + " { role: \"user\", content: summaryPrompt }\n", + " ]);\n", + "\n", + " // Delete messages that we no longer want to show up\n", + " const deleteMessages = state.messages.map(m => new RemoveMessage({ id: m.id }));\n", + " // Re-add user message\n", + " const humanMessage = { role: \"user\", content: lastHumanMessage.content };\n", + " // Call the model with summary & response\n", + " const response = await llm.invoke([systemMessage, summaryMessage, humanMessage]);\n", + " return { messages: [summaryMessage, humanMessage, response, ...deleteMessages] };\n", + " } else {\n", + " const response = await llm.invoke([systemMessage, ...state.messages]);\n", + " return { messages: response };\n", " }\n", - " const summarizationPrompt = ChatPromptTemplate.fromMessages([\n", - " new MessagesPlaceholder(\"chat_history\"),\n", - " [\n", - " \"user\",\n", - " \"Distill the above chat messages into a single summary message. Include as many specific details as you can.\",\n", - " ],\n", - " ]);\n", - " const summarizationChain = summarizationPrompt.pipe(llm);\n", - " const summaryMessage = await summarizationChain.invoke({\n", - " chat_history: storedMessages,\n", - " });\n", - " await demoEphemeralChatMessageHistory.clear();\n", - " demoEphemeralChatMessageHistory.addMessage(summaryMessage);\n", - " return true;\n", "};\n", "\n", - "const chainWithSummarization = RunnableSequence.from([\n", - " RunnablePassthrough.assign({\n", - " messages_summarized: summarizeMessages,\n", - " }),\n", - " chainWithMessageHistory3,\n", - "]);" + "const workflow3 = new StateGraph(MessagesAnnotation)\n", + " // Define the node and edge\n", + " .addNode(\"model\", callModel3)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "// Add simple in-memory checkpointer\n", + "const app3 = workflow3.compile({ checkpointer: new MemorySaver() });" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let’s see if it remembers the name we gave it:" + "Let's see if it remembers the name we gave it:" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 32, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m'You introduced yourself as \"Nemo.\"'\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m'You introduced yourself as \"Nemo.\"'\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m8\u001b[39m, promptTokens: \u001b[33m87\u001b[39m, totalTokens: \u001b[33m95\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABSxXjFDj6WRo7VLSneBtlAxUumPE\",\n", + " \"content\": \"Nemo greeted the assistant and asked how it was doing, to which the assistant responded that it was fine.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 22,\n", + " \"promptTokens\": 60,\n", + " \"totalTokens\": 82\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 60,\n", + " \"output_tokens\": 22,\n", + " \"total_tokens\": 82\n", + " }\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"8b1309b7-c09e-47fb-9ab3-34047f6973e3\",\n", + " \"content\": \"What did I say my name was?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABSxYAQKiBsQ6oVypO4CLFDsi1HRH\",\n", + " \"content\": \"You mentioned that your name is Nemo.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 8,\n", + " \"promptTokens\": 73,\n", + " \"totalTokens\": 81\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 73,\n", + " \"output_tokens\": 8,\n", + " \"total_tokens\": 81\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "await chainWithSummarization.invoke(\n", + "await app3.invoke(\n", " {\n", - " input: \"What did I say my name was?\",\n", + " messages: [\n", + " ...demoEphemeralChatHistory2,\n", + " { role: \"user\", content: \"What did I say my name was?\" }\n", + " ]\n", " },\n", " {\n", - " configurable: { sessionId: \"unused\" },\n", + " configurable: { thread_id: \"4\" }\n", " }\n", ");" ] }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"The conversation consists of a greeting from someone named Nemo and a general inquiry about their we\"\u001b[39m... 86 more characters,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"The conversation consists of a greeting from someone named Nemo and a general inquiry about their we\"\u001b[39m... 86 more characters,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m34\u001b[39m, promptTokens: \u001b[33m62\u001b[39m, totalTokens: \u001b[33m96\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " },\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"What did I say my name was?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"What did I say my name was?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m'You introduced yourself as \"Nemo.\"'\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m'You introduced yourself as \"Nemo.\"'\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m8\u001b[39m, promptTokens: \u001b[33m87\u001b[39m, totalTokens: \u001b[33m95\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - " }\n", - "]" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await demoEphemeralChatMessageHistory.getMessages();" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Note that invoking the chain again will generate another summary generated from the initial summary plus new messages and so on. You could also design a hybrid approach where a certain number of messages are retained in chat history while others are summarized.\n", - "\n", - "## Next steps\n", - "\n", - "You've now learned how to manage memory in your chatbots\n", - "\n", - "Next, check out some of the other guides in this section, such as [how to add retrieval to your chatbot](/docs/how_to/chatbots_retrieval)." + "Note that invoking the app again will keep accumulating the history until it reaches the specified number of messages (four in our case). At that point we will generate another summary generated from the initial summary plus new messages and so on." ] } ], @@ -1144,14 +767,17 @@ "name": "deno" }, "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, "file_extension": ".ts", - "mimetype": "text/x.typescript", + "mimetype": "text/typescript", "name": "typescript", - "nb_converter": "script", - "pygments_lexer": "typescript", - "version": "5.3.3" + "version": "3.7.2" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/docs/core_docs/docs/how_to/chatbots_retrieval.ipynb b/docs/core_docs/docs/how_to/chatbots_retrieval.ipynb index a3af18fed5b4..eed68bdb0bc3 100644 --- a/docs/core_docs/docs/how_to/chatbots_retrieval.ipynb +++ b/docs/core_docs/docs/how_to/chatbots_retrieval.ipynb @@ -45,6 +45,7 @@ "outputs": [], "source": [ "// @lc-docs-hide-cell\n", + "\n", "import { ChatOpenAI } from \"@langchain/openai\";\n", "\n", "const llm = new ChatOpenAI({\n", diff --git a/docs/core_docs/docs/how_to/chatbots_tools.ipynb b/docs/core_docs/docs/how_to/chatbots_tools.ipynb index b5a3b2d0cdd1..d9f8ff25e52f 100644 --- a/docs/core_docs/docs/how_to/chatbots_tools.ipynb +++ b/docs/core_docs/docs/how_to/chatbots_tools.ipynb @@ -4,67 +4,107 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# How to use tools\n", + "# How to add tools to chatbots\n", "\n", ":::info Prerequisites\n", "\n", "This guide assumes familiarity with the following concepts:\n", "\n", "- [Chatbots](/docs/concepts/#messages)\n", - "- [Agents](https://langchain-ai.github.io/langgraphjs/tutorials/quickstart/)\n", + "- [Agents](https://langchain-ai.github.io/langgraphjs/tutorials/multi_agent/agent_supervisor/)\n", "- [Chat history](/docs/concepts/#chat-history)\n", "\n", ":::\n", "\n", "This section will cover how to create conversational agents: chatbots that can interact with other systems and APIs using tools.\n", "\n", - "## Setup\n", + ":::note\n", + "\n", + "This how-to guide previously built a chatbot using [RunnableWithMessageHistory](https://api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html). You can access this version of the tutorial in the [v0.2 docs](https://js.langchain.com/v0.2/docs/how_to/chatbots_tools/).\n", + "\n", + "The LangGraph implementation offers a number of advantages over `RunnableWithMessageHistory`, including the ability to persist arbitrary components of an application's state (instead of only messages).\n", + "\n", + ":::\n", "\n", - "For this guide, we’ll be using an [tool calling agent](/docs/how_to/agent_executor) with a single tool for searching the web. The default will be powered by [Tavily](/docs/integrations/tools/tavily_search), but you can switch it out for any similar tool. The rest of this section will assume you’re using Tavily.\n", + "## Setup\n", "\n", - "You’ll need to [sign up for an account on the Tavily website](https://tavily.com), and install the following packages:\n", + "For this guide, we'll be using a [tool calling agent](https://langchain-ai.github.io/langgraphjs/concepts/agentic_concepts/#tool-calling-agent) with a single tool for searching the web. The default will be powered by [Tavily](/docs/integrations/tools/tavily_search), but you can switch it out for any similar tool. The rest of this section will assume you're using Tavily.\n", "\n", + "You'll need to [sign up for an account](https://tavily.com/) on the Tavily website, and install the following packages:\n", "\n", "```{=mdx}\n", "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", "\n", "\n", - " @langchain/openai langchain @langchain/core\n", + " @langchain/core @langchain/langgraph @langchain/community\n", "\n", + "```\n", + "\n", + "Let’s also set up a chat model that we’ll use for the below examples.\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```\n", + "\n", + "```typescript\n", + "process.env.TAVILY_API_KEY = \"YOUR_API_KEY\";\n", "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating an agent\n", + "\n", + "Our end goal is to create an agent that can respond conversationally to user questions while looking up information as needed.\n", + "\n", + "First, let's initialize Tavily and an OpenAI chat model capable of tool calling:" + ] + }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import { TavilySearchResults } from \"@langchain/community/tools/tavily_search\";\n", + "// @lc-docs-hide-cell\n", + "\n", "import { ChatOpenAI } from \"@langchain/openai\";\n", "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import { TavilySearchResults } from \"@langchain/community/tools/tavily_search\";\n", + "\n", "const tools = [\n", " new TavilySearchResults({\n", " maxResults: 1,\n", " }),\n", - "];\n", - "\n", - "const llm = new ChatOpenAI({\n", - " model: \"gpt-3.5-turbo-1106\",\n", - " temperature: 0,\n", - "});" + "];" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "To make our agent conversational, we must also choose a prompt with a placeholder for our chat history. Here’s an example:\n" + "To make our agent conversational, we can also specify a prompt. Here's an example:" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -78,8 +118,6 @@ " \"system\",\n", " \"You are a helpful assistant. You may not need to use tools for every query - the user may just want to chat!\",\n", " ],\n", - " [\"placeholder\", \"{messages}\"],\n", - " [\"placeholder\", \"{agent_scratchpad}\"],\n", "]);" ] }, @@ -87,30 +125,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Great! Now let’s assemble our agent:\n", - "\n", - "```{=mdx}\n", - ":::tip\n", - "As of `langchain` version `0.2.8`, the `createOpenAIToolsAgent` function now supports [OpenAI-formatted tools](https://api.js.langchain.com/interfaces/langchain_core.language_models_base.ToolDefinition.html).\n", - ":::\n", - "```\n" + "Great! Now let's assemble our agent using LangGraph's prebuilt [createReactAgent](https://langchain-ai.github.io/langgraphjs/reference/functions/langgraph_prebuilt.createReactAgent.html), which allows you to create a [tool-calling agent](https://langchain-ai.github.io/langgraphjs/concepts/agentic_concepts/#tool-calling-agent):" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "import { AgentExecutor, createToolCallingAgent } from \"langchain/agents\";\n", - "\n", - "const agent = await createToolCallingAgent({\n", - " llm,\n", - " tools,\n", - " prompt,\n", - "});\n", + "import { createReactAgent } from \"@langchain/langgraph/prebuilt\"\n", "\n", - "const agentExecutor = new AgentExecutor({ agent, tools });" + "// messageModifier allows you to preprocess the inputs to the model inside ReAct agent\n", + "// in this case, since we're passing a prompt string, we'll just always add a SystemMessage\n", + "// with this prompt string before any other messages sent to the model\n", + "const agent = createReactAgent({ llm, tools, messageModifier: prompt })" ] }, { @@ -119,98 +148,108 @@ "source": [ "## Running the agent\n", "\n", - "Now that we’ve set up our agent, let’s try interacting with it! It can handle both trivial queries that require no lookup:\n" + "Now that we've set up our agent, let's try interacting with it! It can handle both trivial queries that require no lookup:" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{\n", - " messages: [\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " }\n", - " ],\n", - " output: \u001b[32m\"Hi Nemo! It's great to meet you. How can I assist you today?\"\u001b[39m\n", - "}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"8c5fa465-e8d8-472a-9434-f574bf74537f\",\n", + " \"content\": \"I'm Nemo!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTKLLriRcZin65zLAMB3WUf9Sg1t\",\n", + " \"content\": \"How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 8,\n", + " \"promptTokens\": 93,\n", + " \"totalTokens\": 101\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_3537616b13\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 93,\n", + " \"output_tokens\": 8,\n", + " \"total_tokens\": 101\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "import { HumanMessage } from \"@langchain/core/messages\";\n", - "\n", - "await agentExecutor.invoke({\n", - " messages: [new HumanMessage(\"I'm Nemo!\")],\n", - "});" + "await agent.invoke({ messages: [{ role: \"user\", content: \"I'm Nemo!\" }]})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Or, it can use of the passed search tool to get up to date information if needed:\n" + "Or, it can use of the passed search tool to get up to date information if needed:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{\n", - " messages: [\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"What is the current conservation status of the Great Barrier Reef?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"What is the current conservation status of the Great Barrier Reef?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " }\n", - " ],\n", - " output: \u001b[32m\"The Great Barrier Reef has recorded its highest amount of coral cover since the Australian Institute\"\u001b[39m... 688 more characters\n", - "}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"65c315b6-2433-4cb1-97c7-b60b5546f518\",\n", + " \"content\": \"What is the current conservation status of the Great Barrier Reef?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTKLQn1e4axRhqIhpKMyzWWTGauO\",\n", + " \"content\": \"How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 8,\n", + " \"promptTokens\": 93,\n", + " \"totalTokens\": 101\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_3537616b13\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 93,\n", + " \"output_tokens\": 8,\n", + " \"total_tokens\": 101\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "await agentExecutor.invoke({\n", - " messages: [\n", - " new HumanMessage(\n", - " \"What is the current conservation status of the Great Barrier Reef?\"\n", - " ),\n", - " ],\n", - "});" + "await agent.invoke({ messages: [{ role: \"user\", content: \"What is the current conservation status of the Great Barrier Reef?\" }]})" ] }, { @@ -219,246 +258,233 @@ "source": [ "## Conversational responses\n", "\n", - "Because our prompt contains a placeholder for chat history messages, our agent can also take previous interactions into account and respond conversationally like a standard chatbot:\n" + "Because our prompt contains a placeholder for chat history messages, our agent can also take previous interactions into account and respond conversationally like a standard chatbot:" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{\n", - " messages: [\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Hello Nemo! How can I assist you today?\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Hello Nemo! How can I assist you today?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {},\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " usage_metadata: \u001b[90mundefined\u001b[39m\n", - " },\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"What is my name?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"What is my name?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " }\n", - " ],\n", - " output: \u001b[32m\"Your name is Nemo!\"\u001b[39m\n", - "}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"6433afc5-31bd-44b3-b34c-f11647e1677d\",\n", + " \"content\": \"I'm Nemo!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"f163b5f1-ea29-4d7a-9965-7c7c563d9cea\",\n", + " \"content\": \"Hello Nemo! How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"382c3354-d02b-4888-98d8-44d75d045044\",\n", + " \"content\": \"What is my name?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTKMKu7ThZDZW09yMIPTq2N723Cj\",\n", + " \"content\": \"How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 8,\n", + " \"promptTokens\": 93,\n", + " \"totalTokens\": 101\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 93,\n", + " \"output_tokens\": 8,\n", + " \"total_tokens\": 101\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "import { AIMessage } from \"@langchain/core/messages\";\n", - "\n", - "await agentExecutor.invoke({\n", + "await agent.invoke({\n", " messages: [\n", - " new HumanMessage(\"I'm Nemo!\"),\n", - " new AIMessage(\"Hello Nemo! How can I assist you today?\"),\n", - " new HumanMessage(\"What is my name?\"),\n", - " ],\n", - "});" + " { role: \"user\", content: \"I'm Nemo!\" },\n", + " { role: \"user\", content: \"Hello Nemo! How can I assist you today?\" },\n", + " { role: \"user\", content: \"What is my name?\" }\n", + " ]\n", + "})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "If preferred, you can also wrap the agent executor in a [`RunnableWithMessageHistory`](/docs/how_to/message_history/) class to internally manage history messages. Let's redeclare it this way:" + "If preferred, you can also add memory to the LangGraph agent to manage the history of messages. Let's redeclare it this way:" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "const agent2 = await createToolCallingAgent({\n", - " llm,\n", - " tools,\n", - " prompt,\n", - "});\n", + "import { MemorySaver } from \"@langchain/langgraph\"\n", "\n", - "const agentExecutor2 = new AgentExecutor({ agent: agent2, tools });" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then, because our agent executor has multiple outputs, we also have to set the `outputMessagesKey` property when initializing the wrapper:\n" + "// highlight-start\n", + "const memory = new MemorySaver()\n", + "const agent2 = createReactAgent({ llm, tools, messageModifier: prompt, checkpointSaver: memory })\n", + "// highlight-end" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{\n", - " messages: [\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " }\n", - " ],\n", - " output: \u001b[32m\"Hi Nemo! It's great to meet you. How can I assist you today?\"\u001b[39m\n", - "}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"a4a4f663-8192-4179-afcc-88d9d186aa80\",\n", + " \"content\": \"I'm Nemo!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTKi4tBzOWMh3hgA46xXo7bJzb8r\",\n", + " \"content\": \"How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 8,\n", + " \"promptTokens\": 93,\n", + " \"totalTokens\": 101\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 93,\n", + " \"output_tokens\": 8,\n", + " \"total_tokens\": 101\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "import { ChatMessageHistory } from \"langchain/stores/message/in_memory\";\n", - "import { RunnableWithMessageHistory } from \"@langchain/core/runnables\";\n", - "\n", - "const demoEphemeralChatMessageHistory = new ChatMessageHistory();\n", - "\n", - "const conversationalAgentExecutor = new RunnableWithMessageHistory({\n", - " runnable: agentExecutor2,\n", - " getMessageHistory: (_sessionId) => demoEphemeralChatMessageHistory,\n", - " inputMessagesKey: \"messages\",\n", - " outputMessagesKey: \"output\",\n", - "});\n", - "\n", - "await conversationalAgentExecutor.invoke(\n", - " { messages: [new HumanMessage(\"I'm Nemo!\")] },\n", - " { configurable: { sessionId: \"unused\" } }\n", - ");" + "await agent2.invoke({ messages: [{ role: \"user\", content: \"I'm Nemo!\" }]}, { configurable: { thread_id: \"1\" } })" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And then if we rerun our wrapped agent executor:" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{\n", - " messages: [\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"I'm Nemo!\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Hi Nemo! It's great to meet you. How can I assist you today?\"\u001b[39m,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Hi Nemo! It's great to meet you. How can I assist you today?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {},\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " usage_metadata: \u001b[90mundefined\u001b[39m\n", - " },\n", - " HumanMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"What is my name?\"\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"What is my name?\"\u001b[39m,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: {},\n", - " response_metadata: {}\n", - " }\n", - " ],\n", - " output: \u001b[32m\"Your name is Nemo!\"\u001b[39m\n", - "}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " messages: [\n", + " HumanMessage {\n", + " \"id\": \"c5fd303c-eb49-41a0-868e-bc8c5aa02cf6\",\n", + " \"content\": \"I'm Nemo!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTKi4tBzOWMh3hgA46xXo7bJzb8r\",\n", + " \"content\": \"How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 8,\n", + " \"promptTokens\": 93,\n", + " \"totalTokens\": 101\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"id\": \"635b17b9-2ec7-412f-bf45-85d0e9944430\",\n", + " \"content\": \"What is my name?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTKjBbmFlPb5t37aJ8p4NtoHb8YG\",\n", + " \"content\": \"How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 8,\n", + " \"promptTokens\": 93,\n", + " \"totalTokens\": 101\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 93,\n", + " \"output_tokens\": 8,\n", + " \"total_tokens\": 101\n", + " }\n", + " }\n", + " ]\n", + "}\n" + ] } ], "source": [ - "await conversationalAgentExecutor.invoke(\n", - " { messages: [new HumanMessage(\"What is my name?\")] },\n", - " { configurable: { sessionId: \"unused\" } }\n", - ");" + "await agent2.invoke({ messages: [{ role: \"user\", content: \"What is my name?\" }]}, { configurable: { thread_id: \"1\" } })" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Next steps\n", + "This [LangSmith trace](https://smith.langchain.com/public/16cbcfa5-5ef1-4d4c-92c9-538a6e71f23d/r) shows what's going on under the hood.\n", + "\n", + "## Further reading\n", + "\n", + "For more on how to build agents, check these [LangGraph](https://langchain-ai.github.io/langgraphjs/) guides:\n", "\n", - "You've now learned how to create chatbots with tool-use capabilities.\n", + "* [agents conceptual guide](https://langchain-ai.github.io/langgraphjs/concepts/agentic_concepts/)\n", + "* [agents tutorials](https://langchain-ai.github.io/langgraphjs/tutorials/multi_agent/multi_agent_collaboration/)\n", + "* [createReactAgent](https://langchain-ai.github.io/langgraphjs/how-tos/create-react-agent/)\n", "\n", - "For more, check out the other guides in this section, including [how to add history to your chatbots](/docs/how_to/chatbots_memory)." + "For more on tool usage, you can also check out [this use case section](/docs/how_to#tools)." ] } ], @@ -469,14 +495,17 @@ "name": "deno" }, "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, "file_extension": ".ts", - "mimetype": "text/x.typescript", + "mimetype": "text/typescript", "name": "typescript", - "nb_converter": "script", - "pygments_lexer": "typescript", - "version": "5.3.3" + "version": "3.7.2" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/docs/core_docs/docs/how_to/message_history.ipynb b/docs/core_docs/docs/how_to/message_history.ipynb new file mode 100644 index 000000000000..dbca922041ff --- /dev/null +++ b/docs/core_docs/docs/how_to/message_history.ipynb @@ -0,0 +1,586 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "8165bd4c", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "keywords: [memory]\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "f47033eb", + "metadata": {}, + "source": [ + "# How to add message history\n", + "\n", + ":::info Prerequisites\n", + "\n", + "This guide assumes familiarity with the following concepts:\n", + "\n", + "- [Chaining runnables](/docs/how_to/sequence/)\n", + "- [Prompt templates](/docs/concepts/#prompt-templates)\n", + "- [Chat Messages](/docs/concepts/#message-types)\n", + "\n", + ":::\n", + "\n", + "```{=mdx}\n", + ":::note\n", + "\n", + "This guide previously covered the [RunnableWithMessageHistory](https://api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html) abstraction. You can access this version of the guide in the [v0.2 docs](https://js.langchain.com/v0.2/docs/how_to/message_history/).\n", + "\n", + "The LangGraph implementation offers a number of advantages over `RunnableWithMessageHistory`, including the ability to persist arbitrary components of an application's state (instead of only messages).\n", + "\n", + ":::\n", + "```\n", + "\n", + "\n", + "Passing conversation state into and out a chain is vital when building a chatbot. LangGraph implements a built-in persistence layer, allowing chain states to be automatically persisted in memory, or external backends such as SQLite, Postgres or Redis. Details can be found in the LangGraph persistence documentation.\n", + "\n", + "In this guide we demonstrate how to add persistence to arbitrary LangChain runnables by wrapping them in a minimal LangGraph application. This lets us persist the message history and other elements of the chain's state, simplifying the development of multi-turn applications. It also supports multiple threads, enabling a single application to interact separately with multiple users.\n", + "\n", + "## Setup\n", + "\n", + "```{=mdx}\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + " @langchain/core @langchain/langgraph\n", + "\n", + "```\n", + "\n", + "Let’s also set up a chat model that we’ll use for the below examples.\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "8a4e4708", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({\n", + " model: \"gpt-4o\",\n", + " temperature: 0,\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "1f6121bc-2080-4ccc-acf0-f77de4bc951d", + "metadata": {}, + "source": [ + "## Example: message inputs\n", + "\n", + "Adding memory to a [chat model](/docs/concepts/#chat-models) provides a simple example. Chat models accept a list of messages as input and output a message. LangGraph includes a built-in `MessagesState` that we can use for this purpose.\n", + "\n", + "Below, we:\n", + "1. Define the graph state to be a list of messages;\n", + "2. Add a single node to the graph that calls a chat model;\n", + "3. Compile the graph with an in-memory checkpointer to store messages between runs.\n", + "\n", + ":::info\n", + "\n", + "The output of a LangGraph application is its [state](https://langchain-ai.github.io/langgraphjs/concepts/low_level/).\n", + "\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f691a73a-a866-4354-9fff-8315605e2b8f", + "metadata": {}, + "outputs": [], + "source": [ + "import { START, END, MessagesAnnotation, StateGraph, MemorySaver } from \"@langchain/langgraph\";\n", + "\n", + "// Define the function that calls the model\n", + "const callModel = async (state: typeof MessagesAnnotation.State) => {\n", + " const response = await llm.invoke(state.messages);\n", + " // Update message history with response:\n", + " return { messages: response };\n", + "};\n", + "\n", + "// Define a new graph\n", + "const workflow = new StateGraph(MessagesAnnotation)\n", + " // Define the (single) node in the graph\n", + " .addNode(\"model\", callModel)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "// Add memory\n", + "const memory = new MemorySaver();\n", + "const app = workflow.compile({ checkpointer: memory });" + ] + }, + { + "cell_type": "markdown", + "id": "c0b396a8-f81e-4139-b4b2-75adf61d8179", + "metadata": {}, + "source": [ + "When we run the application, we pass in a configuration object that specifies a `thread_id`. This ID is used to distinguish conversational threads (e.g., between different users)." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "e4309511-2140-4d91-8f5f-ea3661e6d179", + "metadata": {}, + "outputs": [], + "source": [ + "import { v4 as uuidv4 } from \"uuid\";\n", + "\n", + "const config = { configurable: { thread_id: uuidv4() } }" + ] + }, + { + "cell_type": "markdown", + "id": "108c45a2-4971-4120-ba64-9a4305a414bb", + "metadata": {}, + "source": [ + "We can then invoke the application:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "72a5ff6c-501f-4151-8dd9-f600f70554be", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABTqCeKnMQmG9IH8dNF5vPjsgXtcM\",\n", + " \"content\": \"Hi Bob! How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 10,\n", + " \"promptTokens\": 12,\n", + " \"totalTokens\": 22\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 12,\n", + " \"output_tokens\": 10,\n", + " \"total_tokens\": 22\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "const input = [\n", + " {\n", + " role: \"user\",\n", + " content: \"Hi! I'm Bob.\",\n", + " }\n", + "]\n", + "const output = await app.invoke({ messages: input }, config)\n", + "// The output contains all messages in the state.\n", + "// This will long the last message in the conversation.\n", + "console.log(output.messages[output.messages.length - 1]);" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "5931fb35-0fac-40e7-8ac6-b14cb4e926cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABTqD5jrJXeKCpvoIDp47fvgw2OPn\",\n", + " \"content\": \"Your name is Bob. How can I help you today, Bob?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 14,\n", + " \"promptTokens\": 34,\n", + " \"totalTokens\": 48\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 34,\n", + " \"output_tokens\": 14,\n", + " \"total_tokens\": 48\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "const input2 = [\n", + " {\n", + " role: \"user\",\n", + " content: \"What's my name?\",\n", + " }\n", + "]\n", + "const output2 = await app.invoke({ messages: input2 }, config)\n", + "console.log(output2.messages[output2.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "id": "91de6d12-881d-4d23-a421-f2e3bf829b79", + "metadata": {}, + "source": [ + "Note that states are separated for different threads. If we issue the same query to a thread with a new `thread_id`, the model indicates that it does not know the answer:" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "6f12c26f-8913-4484-b2c5-b49eda2e6d7d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABTqDkctxwmXjeGOZpK6Km8jdCqdl\",\n", + " \"content\": \"I'm sorry, but I don't have access to personal information about users. How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 21,\n", + " \"promptTokens\": 11,\n", + " \"totalTokens\": 32\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 11,\n", + " \"output_tokens\": 21,\n", + " \"total_tokens\": 32\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "const config2 = { configurable: { thread_id: uuidv4() } }\n", + "const input3 = [\n", + " {\n", + " role: \"user\",\n", + " content: \"What's my name?\",\n", + " }\n", + "]\n", + "const output3 = await app.invoke({ messages: input3 }, config2)\n", + "console.log(output3.messages[output3.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "id": "6749ea95-3382-4843-bb96-cfececb9e4e5", + "metadata": {}, + "source": [ + "## Example: object inputs\n", + "\n", + "LangChain runnables often accept multiple inputs via separate keys in a single object argument. A common example is a prompt template with multiple parameters.\n", + "\n", + "Whereas before our runnable was a chat model, here we chain together a prompt template and chat model." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "6e7a402a-0994-4fc5-a607-fb990a248aa4", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromMessages([\n", + " [\"system\", \"Answer in {language}.\"],\n", + " new MessagesPlaceholder(\"messages\"),\n", + "])\n", + "\n", + "const runnable = prompt.pipe(llm);" + ] + }, + { + "cell_type": "markdown", + "id": "f83107bd-ae61-45e1-a57e-94ab043aad4b", + "metadata": {}, + "source": [ + "For this scenario, we define the graph state to include these parameters (in addition to the message history). We then define a single-node graph in the same way as before.\n", + "\n", + "Note that in the below state:\n", + "- Updates to the `messages` list will append messages;\n", + "- Updates to the `language` string will overwrite the string." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "267429ea-be0f-4f80-8daf-c63d881a1436", + "metadata": {}, + "outputs": [], + "source": [ + "import { START, END, StateGraph, MemorySaver, MessagesAnnotation, Annotation } from \"@langchain/langgraph\";\n", + "\n", + "// Define the State\n", + "// highlight-next-line\n", + "const GraphAnnotation = Annotation.Root({\n", + " // highlight-next-line\n", + " language: Annotation(),\n", + " // Spread `MessagesAnnotation` into the state to add the `messages` field.\n", + " // highlight-next-line\n", + " ...MessagesAnnotation.spec,\n", + "})\n", + "\n", + "\n", + "// Define the function that calls the model\n", + "const callModel2 = async (state: typeof GraphAnnotation.State) => {\n", + " const response = await runnable.invoke(state);\n", + " // Update message history with response:\n", + " return { messages: [response] };\n", + "};\n", + "\n", + "const workflow2 = new StateGraph(GraphAnnotation)\n", + " .addNode(\"model\", callModel2)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "const app2 = workflow2.compile({ checkpointer: new MemorySaver() });" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "f3844fb4-58d7-43c8-b427-6d9f64d7411b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABTqFnCASRB5UhZ7XAbbf5T0Bva4U\",\n", + " \"content\": \"Lo siento, pero no tengo suficiente información para saber tu nombre. ¿Cómo te llamas?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 19,\n", + " \"promptTokens\": 19,\n", + " \"totalTokens\": 38\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 19,\n", + " \"output_tokens\": 19,\n", + " \"total_tokens\": 38\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "const config3 = { configurable: { thread_id: uuidv4() } }\n", + "const input4 = {\n", + " messages: [\n", + " {\n", + " role: \"user\",\n", + " content: \"What's my name?\",\n", + " }\n", + " ],\n", + " language: \"Spanish\",\n", + "} \n", + "const output4 = await app2.invoke(input4, config3)\n", + "console.log(output4.messages[output4.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "id": "7df47824-ef18-4a6e-a416-345ec9203f88", + "metadata": {}, + "source": [ + "## Managing message history\n", + "\n", + "The message history (and other elements of the application state) can be accessed via `.getState`:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "1cbd6d82-43c1-4d11-98af-5c3ad9cd9b3b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Language: Spanish\n", + "[\n", + " HumanMessage {\n", + " \"content\": \"What's my name?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTqFnCASRB5UhZ7XAbbf5T0Bva4U\",\n", + " \"content\": \"Lo siento, pero no tengo suficiente información para saber tu nombre. ¿Cómo te llamas?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 19,\n", + " \"promptTokens\": 19,\n", + " \"totalTokens\": 38\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const state = (await app2.getState(config3)).values\n", + "\n", + "console.log(`Language: ${state.language}`);\n", + "console.log(state.messages)" + ] + }, + { + "cell_type": "markdown", + "id": "acfbccda-0bd6-4c4d-ae6e-8118520314e1", + "metadata": {}, + "source": [ + "We can also update the state via `.updateState`. For example, we can manually append a new message:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "e98310d7-8ab1-461d-94a7-dd419494ab8d", + "metadata": {}, + "outputs": [], + "source": [ + "const _ = await app2.updateState(config3, { messages: [{ role: \"user\", content: \"test\" }]})" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "74ab3691-6f3b-49c5-aad0-2a90fc2a1e6a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Language: Spanish\n", + "[\n", + " HumanMessage {\n", + " \"content\": \"What's my name?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABTqFnCASRB5UhZ7XAbbf5T0Bva4U\",\n", + " \"content\": \"Lo siento, pero no tengo suficiente información para saber tu nombre. ¿Cómo te llamas?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 19,\n", + " \"promptTokens\": 19,\n", + " \"totalTokens\": 38\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"content\": \"test\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const state2 = (await app2.getState(config3)).values\n", + "\n", + "console.log(`Language: ${state2.language}`);\n", + "console.log(state2.messages)" + ] + }, + { + "cell_type": "markdown", + "id": "e4a1ea00-d7ff-4f18-b9ec-9aec5909d027", + "metadata": {}, + "source": [ + "For details on managing state, including deleting messages, see the LangGraph documentation:\n", + "\n", + "- [How to delete messages](https://langchain-ai.github.io/langgraphjs/how-tos/delete-messages/)\n", + "- [How to view and update past graph state](https://langchain-ai.github.io/langgraphjs/how-tos/time-travel/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/how_to/message_history.mdx b/docs/core_docs/docs/how_to/message_history.mdx deleted file mode 100644 index 2712135ff482..000000000000 --- a/docs/core_docs/docs/how_to/message_history.mdx +++ /dev/null @@ -1,206 +0,0 @@ -# How to add message history - -:::info Prerequisites - -This guide assumes familiarity with the following concepts: - -- [LangChain Expression Language (LCEL)](/docs/concepts/#langchain-expression-language) -- [Chaining runnables](/docs/how_to/sequence/) -- [Configuring chain parameters at runtime](/docs/how_to/binding) -- [Prompt templates](/docs/concepts/#prompt-templates) -- [Chat Messages](/docs/concepts/#message-types) - -::: - -The `RunnableWithMessageHistory` lets us add message history to certain types of chains. - -Specifically, it can be used for any Runnable that takes as input one of - -- a sequence of [`BaseMessages`](/docs/concepts/#message-types) -- a dict with a key that takes a sequence of `BaseMessage` -- a dict with a key that takes the latest message(s) as a string or sequence of `BaseMessage`, and a separate key that takes historical messages - -And returns as output one of - -- a string that can be treated as the contents of an `AIMessage` -- a sequence of `BaseMessage` -- a dict with a key that contains a sequence of `BaseMessage` - -Let's take a look at some examples to see how it works. - -## Setup - -We'll use Upstash to store our chat message histories and Anthropic's claude-2 model so we'll need to install the following dependencies: - -```bash npm2yarn -npm install @langchain/anthropic @langchain/community @langchain/core @upstash/redis -``` - -You'll need to set environment variables for `ANTHROPIC_API_KEY` and grab your Upstash REST url and secret token. - -### [LangSmith](https://smith.langchain.com/) - -LangSmith is especially useful for something like message history injection, where it can be hard to otherwise understand what the inputs are to various parts of the chain. - -Note that LangSmith is not needed, but it is helpful. -If you do want to use LangSmith, after you sign up at the link above, make sure to uncoment the below and set your environment variables to start logging traces: - -```bash -export LANGCHAIN_TRACING_V2="true" -export LANGCHAIN_API_KEY="" - -# Reduce tracing latency if you are not in a serverless environment -# export LANGCHAIN_CALLBACKS_BACKGROUND=true -``` - -Let's create a simple runnable that takes a dict as input and returns a `BaseMessage`. - -In this case the `"question"` key in the input represents our input message, and the `"history"` key is where our historical messages will be injected. - -```typescript -import { - ChatPromptTemplate, - MessagesPlaceholder, -} from "@langchain/core/prompts"; -import { ChatAnthropic } from "@langchain/anthropic"; -import { UpstashRedisChatMessageHistory } from "@langchain/community/stores/message/upstash_redis"; -// For demos, you can also use an in-memory store: -// import { ChatMessageHistory } from "langchain/stores/message/in_memory"; - -const prompt = ChatPromptTemplate.fromMessages([ - ["system", "You're an assistant who's good at {ability}"], - new MessagesPlaceholder("history"), - ["human", "{question}"], -]); - -const chain = prompt.pipe( - new ChatAnthropic({ model: "claude-3-sonnet-20240229" }) -); -``` - -### Adding message history - -To add message history to our original chain we wrap it in the `RunnableWithMessageHistory` class. - -Crucially, we also need to define a `getMessageHistory()` method that takes a `sessionId` string and based on it returns a `BaseChatMessageHistory`. Given the same input, this method should return an equivalent output. - -In this case, we'll also want to specify `inputMessagesKey` (the key to be treated as the latest input message) and `historyMessagesKey` (the key to add historical messages to). - -```typescript -import { RunnableWithMessageHistory } from "@langchain/core/runnables"; - -const chainWithHistory = new RunnableWithMessageHistory({ - runnable: chain, - getMessageHistory: (sessionId) => - new UpstashRedisChatMessageHistory({ - sessionId, - config: { - url: process.env.UPSTASH_REDIS_REST_URL!, - token: process.env.UPSTASH_REDIS_REST_TOKEN!, - }, - }), - inputMessagesKey: "question", - historyMessagesKey: "history", -}); -``` - -## Invoking with config - -Whenever we call our chain with message history, we need to include an additional config object that contains the `session_id` - -```typescript -{ - configurable: { - sessionId: ""; - } -} -``` - -Given the same configuration, our chain should be pulling from the same chat message history. - -```typescript -const result = await chainWithHistory.invoke( - { - ability: "math", - question: "What does cosine mean?", - }, - { - configurable: { - sessionId: "foobarbaz", - }, - } -); - -console.log(result); - -/* - AIMessage { - content: 'Cosine refers to one of the basic trigonometric functions. Specifically:\n' + - '\n' + - '- Cosine is one of the three main trigonometric functions, along with sine and tangent. It is often abbreviated as cos.\n' + - '\n' + - '- For a right triangle with sides a, b, and c (where c is the hypotenuse), cosine represents the ratio of the length of the adjacent side (a) to the length of the hypotenuse (c). So cos(A) = a/c, where A is the angle opposite side a.\n' + - '\n' + - '- On the Cartesian plane, cosine represents the x-coordinate of a point on the unit circle for a given angle. So if you take an angle θ on the unit circle, the cosine of θ gives you the x-coordinate of where the terminal side of that angle intersects the circle.\n' + - '\n' + - '- The cosine function has a periodic waveform that oscillates between 1 and -1. Its graph forms a cosine wave.\n' + - '\n' + - 'So in essence, cosine helps relate an angle in a right triangle to the ratio of two of its sides. Along with sine and tangent, it is foundational to trigonometry and mathematical modeling of periodic functions.', - name: undefined, - additional_kwargs: { - id: 'msg_01QnnAkKEz7WvhJrwLWGbLBm', - type: 'message', - role: 'assistant', - model: 'claude-3-sonnet-20240229', - stop_reason: 'end_turn', - stop_sequence: null - } - } -*/ - -const result2 = await chainWithHistory.invoke( - { - ability: "math", - question: "What's its inverse?", - }, - { - configurable: { - sessionId: "foobarbaz", - }, - } -); - -console.log(result2); - -/* - AIMessage { - content: 'The inverse of the cosine function is the arcsine or inverse sine function, often written as sin−1(x) or sin^{-1}(x).\n' + - '\n' + - 'Some key properties of the inverse cosine function:\n' + - '\n' + - '- It accepts values between -1 and 1 as inputs and returns angles from 0 to π radians (0 to 180 degrees). This is the inverse of the regular cosine function, which takes angles and returns the cosine ratio.\n' + - '\n' + - '- It is also called cos−1(x) or cos^{-1}(x) (read as "cosine inverse of x").\n' + - '\n' + - '- The notation sin−1(x) is usually preferred over cos−1(x) since it relates more directly to the unit circle definition of cosine. sin−1(x) gives the angle whose sine equals x.\n' + - '\n' + - '- The arcsine function is one-to-one on the domain [-1, 1]. This means every output angle maps back to exactly one input ratio x. This one-to-one mapping is what makes it the mathematical inverse of cosine.\n' + - '\n' + - 'So in summary, arcsine or inverse sine, written as sin−1(x) or sin^{-1}(x), gives you the angle whose cosine evaluates to the input x, undoing the cosine function. It is used throughout trigonometry and calculus.', - additional_kwargs: { - id: 'msg_01PYRhpoUudApdJvxug6R13W', - type: 'message', - role: 'assistant', - model: 'claude-3-sonnet-20240229', - stop_reason: 'end_turn', - stop_sequence: null - } - } -*/ -``` - -:::tip -[Langsmith trace](https://smith.langchain.com/public/50377a89-d0b8-413b-8cd7-8e6618835e00/r) -::: - -Looking at the Langsmith trace for the second call, we can see that when constructing the prompt, a "history" variable has been injected which is a list of two messages (our first input and first output). diff --git a/docs/core_docs/docs/how_to/qa_chat_history_how_to.ipynb b/docs/core_docs/docs/how_to/qa_chat_history_how_to.ipynb index cead1afc3fa2..4ed8e4bd849e 100644 --- a/docs/core_docs/docs/how_to/qa_chat_history_how_to.ipynb +++ b/docs/core_docs/docs/how_to/qa_chat_history_how_to.ipynb @@ -4,24 +4,29 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# How to add chat history to a question-answering chain\n", + "# How to add chat history\n", "\n", - ":::info Prerequisites\n", "\n", - "This guide assumes familiarity with the following:\n", + ":::note\n", "\n", - "- [Retrieval-augmented generation](/docs/tutorials/rag/)\n", + "This tutorial previously built a chatbot using [RunnableWithMessageHistory](https://api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html). You can access this version of the tutorial in the [v0.2 docs](https://js.langchain.com/v0.2/docs/how_to/qa_chat_history_how_to/).\n", + "\n", + "The LangGraph implementation offers a number of advantages over `RunnableWithMessageHistory`, including the ability to persist arbitrary components of an application's state (instead of only messages).\n", "\n", ":::\n", "\n", "In many Q&A applications we want to allow the user to have a back-and-forth conversation, meaning the application needs some sort of \"memory\" of past questions and answers, and some logic for incorporating those into its current thinking.\n", "\n", - "In this guide we focus on **adding logic for incorporating historical messages, and NOT on chat history management.** Chat history management is [covered here](/docs/how_to/message_history).\n", + "In this guide we focus on **adding logic for incorporating historical messages.**\n", + "\n", + "This is largely a condensed version of the [Conversational RAG tutorial](/docs/tutorials/qa_chat_history).\n", + "\n", + "We will cover two approaches:\n", "\n", - "We'll work off of the Q&A app we built over the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng. We'll need to update two things about our existing app:\n", + "1. [Chains](/docs/how_to/qa_chat_history_how_to#chains), in which we always execute a retrieval step;\n", + "2. [Agents](/docs/how_to/qa_chat_history_how_to#agents), in which we give an LLM discretion over whether and how to execute a retrieval step (or multiple steps).\n", "\n", - "1. **Prompt**: Update our prompt to support historical messages as an input.\n", - "2. **Contextualizing questions**: Add a sub-chain that takes the latest user question and reformulates it in the context of the chat history. This is needed in case the latest question references some context from past messages. For example, if a user asks a follow-up question like \"Can you elaborate on the second point?\", this cannot be understood without the context of the previous message. Therefore we can't effectively perform retrieval with a question like this." + "For the external knowledge source, we will use the same [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng from the [RAG tutorial](/docs/tutorials/rag)." ] }, { @@ -36,7 +41,7 @@ "We’ll use the following packages:\n", "\n", "```bash\n", - "npm install --save langchain @langchain/openai cheerio\n", + "npm install --save langchain @langchain/openai langchain cheerio uuid\n", "```\n", "\n", "We need to set environment variable `OPENAI_API_KEY`:\n", @@ -66,6 +71,43 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chains {#chains}\n", + "\n", + "In a conversational RAG application, queries issued to the retriever should be informed by the context of the conversation. LangChain provides a [createHistoryAwareRetriever](https://api.js.langchain.com/functions/langchain.chains_history_aware_retriever.createHistoryAwareRetriever.html) constructor to simplify this. It constructs a chain that accepts keys `input` and `chat_history` as input, and has the same output schema as a retriever. `createHistoryAwareRetriever` requires as inputs: \n", + "\n", + "1. LLM;\n", + "2. Retriever;\n", + "3. Prompt.\n", + "\n", + "First we obtain these objects:\n", + "\n", + "### LLM\n", + "\n", + "We can use any supported chat model:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\"\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({ model: \"gpt-4o\" });" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -75,21 +117,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "import \"cheerio\";\n", "import { CheerioWebBaseLoader } from \"@langchain/community/document_loaders/web/cheerio\";\n", "import { RecursiveCharacterTextSplitter } from \"langchain/text_splitter\";\n", "import { MemoryVectorStore } from \"langchain/vectorstores/memory\"\n", - "import { OpenAIEmbeddings, ChatOpenAI } from \"@langchain/openai\";\n", - "import { pull } from \"langchain/hub\";\n", - "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", - "import { RunnableSequence, RunnablePassthrough } from \"@langchain/core/runnables\";\n", - "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", - "\n", - "import { createStuffDocumentsChain } from \"langchain/chains/combine_documents\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", "\n", "const loader = new CheerioWebBaseLoader(\n", " \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\n", @@ -102,14 +137,112 @@ "const vectorStore = await MemoryVectorStore.fromDocuments(splits, new OpenAIEmbeddings());\n", "\n", "// Retrieve and generate using the relevant snippets of the blog.\n", - "const retriever = vectorStore.asRetriever();\n", - "// Tip - you can edit this!\n", - "const prompt = await pull(\"rlm/rag-prompt\");\n", - "const llm = new ChatOpenAI({ model: \"gpt-3.5-turbo\", temperature: 0 });\n", - "const ragChain = await createStuffDocumentsChain({\n", + "const retriever = vectorStore.asRetriever();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prompt\n", + "\n", + "We'll use a prompt that includes a `MessagesPlaceholder` variable under the name \"chat_history\". This allows us to pass in a list of Messages to the prompt using the \"chat_history\" input key, and these messages will be inserted after the system message and before the human message containing the latest question." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\";\n", + "\n", + "const contextualizeQSystemPrompt = (\n", + " \"Given a chat history and the latest user question \" +\n", + " \"which might reference context in the chat history, \" +\n", + " \"formulate a standalone question which can be understood \" +\n", + " \"without the chat history. Do NOT answer the question, \" +\n", + " \"just reformulate it if needed and otherwise return it as is.\"\n", + ")\n", + "\n", + "const contextualizeQPrompt = ChatPromptTemplate.fromMessages(\n", + " [\n", + " [\"system\", contextualizeQSystemPrompt],\n", + " new MessagesPlaceholder(\"chat_history\"),\n", + " [\"human\", \"{input}\"],\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assembling the chain\n", + "\n", + "We can then instantiate the history-aware retriever:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import { createHistoryAwareRetriever } from \"langchain/chains/history_aware_retriever\";\n", + "\n", + "const historyAwareRetriever = await createHistoryAwareRetriever({\n", " llm,\n", - " prompt,\n", - " outputParser: new StringOutputParser(),\n", + " retriever,\n", + " rephrasePrompt: contextualizeQPrompt\n", + "});\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This chain prepends a rephrasing of the input query to our retriever, so that the retrieval incorporates the context of the conversation.\n", + "\n", + "Now we can build our full QA chain.\n", + "\n", + "As in the [RAG tutorial](/docs/tutorials/rag), we will use [createStuffDocumentsChain](https://api.js.langchain.com/functions/langchain.chains_combine_documents.createStuffDocumentsChain.html) to generate a `questionAnswerChain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n", + "\n", + "We build our final `ragChain` with [createRetrievalChain](https://api.js.langchain.com/functions/langchain.chains_retrieval.createRetrievalChain.html). This chain applies the `historyAwareRetriever` and `questionAnswerChain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import { createStuffDocumentsChain } from \"langchain/chains/combine_documents\";\n", + "import { createRetrievalChain } from \"langchain/chains/retrieval\";\n", + "\n", + "const systemPrompt = \n", + " \"You are an assistant for question-answering tasks. \" +\n", + " \"Use the following pieces of retrieved context to answer \" +\n", + " \"the question. If you don't know the answer, say that you \" +\n", + " \"don't know. Use three sentences maximum and keep the \" +\n", + " \"answer concise.\" +\n", + " \"\\n\\n\" +\n", + " \"{context}\";\n", + "\n", + "const qaPrompt = ChatPromptTemplate.fromMessages([\n", + " [\"system\", systemPrompt],\n", + " new MessagesPlaceholder(\"chat_history\"),\n", + " [\"human\", \"{input}\"],\n", + "]);\n", + "\n", + "const questionAnswerChain = await createStuffDocumentsChain({\n", + " llm,\n", + " prompt: qaPrompt,\n", + "});\n", + "\n", + "const ragChain = await createRetrievalChain({\n", + " retriever: historyAwareRetriever,\n", + " combineDocsChain: questionAnswerChain,\n", "});" ] }, @@ -117,278 +250,885 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's see what this prompt actually looks like" + "### Stateful Management of chat history\n", + "\n", + "We have added application logic for incorporating chat history, but we are still manually plumbing it through our application. In production, the Q&A application we usually persist the chat history into a database, and be able to read and update it appropriately.\n", + "\n", + "[LangGraph](https://langchain-ai.github.io/langgraphjs/) implements a built-in [persistence layer](https://langchain-ai.github.io/langgraphjs/concepts/persistence/), making it ideal for chat applications that support multiple conversational turns.\n", + "\n", + "Wrapping our chat model in a minimal LangGraph application allows us to automatically persist the message history, simplifying the development of multi-turn applications.\n", + "\n", + "LangGraph comes with a simple [in-memory checkpointer](https://langchain-ai.github.io/langgraphjs/reference/classes/checkpoint.MemorySaver.html), which we use below. See its documentation for more detail, including how to use different persistence backends (e.g., SQLite or Postgres).\n", + "\n", + "For a detailed walkthrough of how to manage message history, head to the How to add message history (memory) guide." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import { AIMessage, BaseMessage, HumanMessage } from \"@langchain/core/messages\";\n", + "import { StateGraph, START, END, MemorySaver, messagesStateReducer, Annotation } from \"@langchain/langgraph\";\n", + "\n", + "// Define the State interface\n", + "const GraphAnnotation = Annotation.Root({\n", + " input: Annotation(),\n", + " chat_history: Annotation({\n", + " reducer: messagesStateReducer,\n", + " default: () => [],\n", + " }),\n", + " context: Annotation(),\n", + " answer: Annotation(),\n", + "})\n", + "\n", + "// Define the call_model function\n", + "async function callModel(state: typeof GraphAnnotation.State) {\n", + " const response = await ragChain.invoke(state);\n", + " return {\n", + " chat_history: [\n", + " new HumanMessage(state.input),\n", + " new AIMessage(response.answer),\n", + " ],\n", + " context: response.context,\n", + " answer: response.answer,\n", + " };\n", + "}\n", + "\n", + "// Create the workflow\n", + "const workflow = new StateGraph(GraphAnnotation)\n", + " .addNode(\"model\", callModel)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "// Compile the graph with a checkpointer object\n", + "const memory = new MemorySaver();\n", + "const app = workflow.compile({ checkpointer: memory });" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n", - "Question: {question} \n", - "Context: {context} \n", - "Answer:\n" + "Task Decomposition is the process of breaking down a complicated task into smaller, simpler, and more manageable steps. Techniques like Chain of Thought (CoT) and Tree of Thoughts expand on this by enabling agents to think step by step or explore multiple reasoning possibilities at each step. This allows for a more structured and interpretable approach to handling complex tasks.\n" ] } ], "source": [ - "console.log(prompt.promptMessages.map((msg) => msg.prompt.template).join(\"\\n\"));" + "import { v4 as uuidv4 } from \"uuid\";\n", + "\n", + "const threadId = uuidv4();\n", + "const config = { configurable: { thread_id: threadId } };\n", + "\n", + "const result = await app.invoke(\n", + " { input: \"What is Task Decomposition?\" },\n", + " config,\n", + ")\n", + "console.log(result.answer);" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"Task Decomposition involves breaking down complex tasks into smaller and simpler steps to make them \"\u001b[39m... 243 more characters" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "One way of doing task decomposition is by using an LLM with simple prompting, such as asking \"Steps for XYZ.\\n1.\" or \"What are the subgoals for achieving XYZ?\" This method leverages direct prompts to guide the model in breaking down tasks.\n" + ] } ], "source": [ - "await ragChain.invoke({\n", - " context: await retriever.invoke(\"What is Task Decomposition?\"),\n", - " question: \"What is Task Decomposition?\"\n", - "});" + "const result2 = await app.invoke(\n", + " { input: \"What is one way of doing it?\" },\n", + " config,\n", + ")\n", + "console.log(result2.answer);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Contextualizing the question\n", + "The conversation history can be inspected via the state of the application:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HumanMessage {\n", + " \"content\": \"What is Task Decomposition?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + "}\n", + "AIMessage {\n", + " \"content\": \"Task Decomposition is the process of breaking down a complicated task into smaller, simpler, and more manageable steps. Techniques like Chain of Thought (CoT) and Tree of Thoughts expand on this by enabling agents to think step by step or explore multiple reasoning possibilities at each step. This allows for a more structured and interpretable approach to handling complex tasks.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + "}\n", + "HumanMessage {\n", + " \"content\": \"What is one way of doing it?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + "}\n", + "AIMessage {\n", + " \"content\": \"One way of doing task decomposition is by using an LLM with simple prompting, such as asking \\\"Steps for XYZ.\\\\n1.\\\" or \\\"What are the subgoals for achieving XYZ?\\\" This method leverages direct prompts to guide the model in breaking down tasks.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + "}\n" + ] + } + ], + "source": [ + "const chatHistory = (await app.getState(config)).values.chat_history;\n", + "for (const message of chatHistory) {\n", + " console.log(message);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tying it together\n", "\n", - "First we'll need to define a sub-chain that takes historical messages and the latest user question, and reformulates the question if it makes reference to any information in the historical information.\n", + "![](../../static/img/conversational_retrieval_chain.png)\n", "\n", - "We'll use a prompt that includes a `MessagesPlaceholder` variable under the name \"chat_history\". This allows us to pass in a list of Messages to the prompt using the \"chat_history\" input key, and these messages will be inserted after the system message and before the human message containing the latest question." + "For convenience, we tie together all of the necessary steps in a single code cell:" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Task Decomposition is the process of breaking a complicated task into smaller, simpler steps to enhance model performance on complex tasks. Techniques like Chain of Thought (CoT) and Tree of Thoughts (ToT) are used for this, with CoT focusing on step-by-step thinking and ToT exploring multiple reasoning possibilities at each step. Decomposition can be carried out by the LLM itself, using task-specific instructions, or through human inputs.\n", + "One way of doing task decomposition is by prompting the LLM with simple instructions such as \"Steps for XYZ.\\n1.\" or \"What are the subgoals for achieving XYZ?\" This encourages the model to break down the task into smaller, manageable steps on its own.\n" + ] + } + ], "source": [ + "import { CheerioWebBaseLoader } from \"@langchain/community/document_loaders/web/cheerio\";\n", + "import { RecursiveCharacterTextSplitter } from \"langchain/text_splitter\";\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\"\n", + "import { OpenAIEmbeddings, ChatOpenAI } from \"@langchain/openai\";\n", "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\";\n", + "import { createHistoryAwareRetriever } from \"langchain/chains/history_aware_retriever\";\n", + "import { createStuffDocumentsChain } from \"langchain/chains/combine_documents\";\n", + "import { createRetrievalChain } from \"langchain/chains/retrieval\";\n", + "import { AIMessage, BaseMessage, HumanMessage } from \"@langchain/core/messages\";\n", + "import { StateGraph, START, END, MemorySaver, messagesStateReducer, Annotation } from \"@langchain/langgraph\";\n", + "import { v4 as uuidv4 } from \"uuid\";\n", "\n", - "const contextualizeQSystemPrompt = `Given a chat history and the latest user question\n", - "which might reference context in the chat history, formulate a standalone question\n", - "which can be understood without the chat history. Do NOT answer the question,\n", - "just reformulate it if needed and otherwise return it as is.`;\n", + "const llm2 = new ChatOpenAI({ model: \"gpt-4o\" });\n", "\n", - "const contextualizeQPrompt = ChatPromptTemplate.fromMessages([\n", - " [\"system\", contextualizeQSystemPrompt],\n", + "const loader2 = new CheerioWebBaseLoader(\n", + " \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\n", + ");\n", + "\n", + "const docs2 = await loader2.load();\n", + "\n", + "const textSplitter2 = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });\n", + "const splits2 = await textSplitter2.splitDocuments(docs2);\n", + "const vectorStore2 = await MemoryVectorStore.fromDocuments(splits2, new OpenAIEmbeddings());\n", + "\n", + "// Retrieve and generate using the relevant snippets of the blog.\n", + "const retriever2 = vectorStore2.asRetriever();\n", + "\n", + "const contextualizeQSystemPrompt2 =\n", + " \"Given a chat history and the latest user question \" +\n", + " \"which might reference context in the chat history, \" +\n", + " \"formulate a standalone question which can be understood \" +\n", + " \"without the chat history. Do NOT answer the question, \" +\n", + " \"just reformulate it if needed and otherwise return it as is.\";\n", + "\n", + "const contextualizeQPrompt2 = ChatPromptTemplate.fromMessages(\n", + " [\n", + " [\"system\", contextualizeQSystemPrompt2],\n", + " new MessagesPlaceholder(\"chat_history\"),\n", + " [\"human\", \"{input}\"],\n", + " ]\n", + ")\n", + "\n", + "const historyAwareRetriever2 = await createHistoryAwareRetriever({\n", + " llm: llm2,\n", + " retriever: retriever2,\n", + " rephrasePrompt: contextualizeQPrompt2\n", + "});\n", + "\n", + "const systemPrompt2 = \n", + " \"You are an assistant for question-answering tasks. \" +\n", + " \"Use the following pieces of retrieved context to answer \" +\n", + " \"the question. If you don't know the answer, say that you \" +\n", + " \"don't know. Use three sentences maximum and keep the \" +\n", + " \"answer concise.\" +\n", + " \"\\n\\n\" +\n", + " \"{context}\";\n", + "\n", + "const qaPrompt2 = ChatPromptTemplate.fromMessages([\n", + " [\"system\", systemPrompt2],\n", " new MessagesPlaceholder(\"chat_history\"),\n", - " [\"human\", \"{question}\"]\n", + " [\"human\", \"{input}\"],\n", "]);\n", - "const contextualizeQChain = contextualizeQPrompt.pipe(llm).pipe(new StringOutputParser());" + "\n", + "const questionAnswerChain2 = await createStuffDocumentsChain({\n", + " llm: llm2,\n", + " prompt: qaPrompt2,\n", + "});\n", + "\n", + "const ragChain2 = await createRetrievalChain({\n", + " retriever: historyAwareRetriever2,\n", + " combineDocsChain: questionAnswerChain2,\n", + "});\n", + "\n", + "// Define the State interface\n", + "const GraphAnnotation2 = Annotation.Root({\n", + " input: Annotation(),\n", + " chat_history: Annotation({\n", + " reducer: messagesStateReducer,\n", + " default: () => [],\n", + " }),\n", + " context: Annotation(),\n", + " answer: Annotation(),\n", + "})\n", + "\n", + "// Define the call_model function\n", + "async function callModel2(state: typeof GraphAnnotation2.State) {\n", + " const response = await ragChain2.invoke(state);\n", + " return {\n", + " chat_history: [\n", + " new HumanMessage(state.input),\n", + " new AIMessage(response.answer),\n", + " ],\n", + " context: response.context,\n", + " answer: response.answer,\n", + " };\n", + "}\n", + "\n", + "// Create the workflow\n", + "const workflow2 = new StateGraph(GraphAnnotation2)\n", + " .addNode(\"model\", callModel2)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "// Compile the graph with a checkpointer object\n", + "const memory2 = new MemorySaver();\n", + "const app2 = workflow2.compile({ checkpointer: memory2 });\n", + "\n", + "const threadId2 = uuidv4();\n", + "const config2 = { configurable: { thread_id: threadId2 } };\n", + "\n", + "const result3 = await app2.invoke(\n", + " { input: \"What is Task Decomposition?\" },\n", + " config2,\n", + ")\n", + "console.log(result3.answer);\n", + "\n", + "const result4 = await app2.invoke(\n", + " { input: \"What is one way of doing it?\" },\n", + " config2,\n", + ")\n", + "console.log(result4.answer);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Using this chain we can ask follow-up questions that reference past messages and have them reformulated into standalone questions:" + "## Agents {#agents}\n", + "\n", + "Agents leverage the reasoning capabilities of LLMs to make decisions during execution. Using agents allow you to offload some discretion over the retrieval process. Although their behavior is less predictable than chains, they offer some advantages in this context:\n", + "- Agents generate the input to the retriever directly, without necessarily needing us to explicitly build in contextualization, as we did above;\n", + "- Agents can execute multiple retrieval steps in service of a query, or refrain from executing a retrieval step altogether (e.g., in response to a generic greeting from a user).\n", + "\n", + "### Retrieval tool\n", + "\n", + "Agents can access \"tools\" and manage their execution. In this case, we will convert our retriever into a LangChain tool to be wielded by the agent:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\u001b[32m'What is the definition of \"large\" in this context?'\u001b[39m" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "import { AIMessage, HumanMessage } from \"@langchain/core/messages\";\n", + "import { createRetrieverTool } from \"langchain/tools/retriever\";\n", "\n", - "await contextualizeQChain.invoke({\n", - " chat_history: [\n", - " new HumanMessage(\"What does LLM stand for?\"),\n", - " new AIMessage(\"Large language model\") \n", - " ],\n", - " question: \"What is meant by large\",\n", - "})" + "const tool = createRetrieverTool(\n", + " retriever,\n", + " {\n", + " name: \"blog_post_retriever\",\n", + " description: \"Searches and returns excerpts from the Autonomous Agents blog post.\",\n", + " }\n", + ")\n", + "const tools = [tool]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Chain with chat history\n", + "### Agent constructor\n", "\n", - "And now we can build our full QA chain. \n", + "Now that we have defined the tools and the LLM, we can create the agent. We will be using [LangGraph](/docs/concepts/#langgraph) to construct the agent. \n", + "Currently we are using a high level interface to construct the agent, but the nice thing about LangGraph is that this high-level interface is backed by a low-level, highly controllable API in case you want to modify the agent logic." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n", "\n", - "Notice we add some routing functionality to only run the \"condense question chain\" when our chat history isn't empty. Here we're taking advantage of the fact that if a function in an LCEL chain returns another chain, that chain will itself be invoked." + "const agentExecutor = createReactAgent({ llm, tools })" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now try it out. Note that so far it is not stateful (we still need to add in memory)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "AIMessage {\n", - " lc_serializable: true,\n", - " lc_kwargs: {\n", - " content: \"Task decomposition involves breaking down a complex task into smaller and simpler steps to make it m\"... 358 more characters,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: undefined, tool_calls: undefined },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \"langchain_core\", \"messages\" ],\n", - " content: \"Task decomposition involves breaking down a complex task into smaller and simpler steps to make it m\"... 358 more characters,\n", - " name: undefined,\n", - " additional_kwargs: { function_call: undefined, tool_calls: undefined },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: 83, promptTokens: 701, totalTokens: 784 },\n", - " finish_reason: \"stop\"\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}\n" + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-AB7xlcJBGSKSp1GvgDY9FP8KvXxwB\",\n", + " \"content\": \"\",\n", + " \"additional_kwargs\": {\n", + " \"tool_calls\": [\n", + " {\n", + " \"id\": \"call_Ev0nA6nzGwOeMC5upJUUxTuw\",\n", + " \"type\": \"function\",\n", + " \"function\": \"[Object]\"\n", + " }\n", + " ]\n", + " },\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 19,\n", + " \"promptTokens\": 66,\n", + " \"totalTokens\": 85\n", + " },\n", + " \"finish_reason\": \"tool_calls\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [\n", + " {\n", + " \"name\": \"blog_post_retriever\",\n", + " \"args\": {\n", + " \"query\": \"Task Decomposition\"\n", + " },\n", + " \"type\": \"tool_call\",\n", + " \"id\": \"call_Ev0nA6nzGwOeMC5upJUUxTuw\"\n", + " }\n", + " ],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 66,\n", + " \"output_tokens\": 19,\n", + " \"total_tokens\": 85\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " tools: {\n", + " messages: [\n", + " ToolMessage {\n", + " \"content\": \"Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\n\\nTask decomposition can be done (1) by LLM with simple prompting like \\\"Steps for XYZ.\\\\n1.\\\", \\\"What are the subgoals for achieving XYZ?\\\", (2) by using task-specific instructions; e.g. \\\"Write a story outline.\\\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\n\\nAgent System Overview\\n \\n Component One: Planning\\n \\n \\n Task Decomposition\\n \\n Self-Reflection\\n \\n \\n Component Two: Memory\\n \\n \\n Types of Memory\\n \\n Maximum Inner Product Search (MIPS)\\n \\n \\n Component Three: Tool Use\\n \\n Case Studies\\n \\n \\n Scientific Discovery Agent\\n \\n Generative Agents Simulation\\n \\n Proof-of-Concept Examples\\n \\n \\n Challenges\\n \\n Citation\\n \\n References\\n\\n(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\",\n", + " \"name\": \"blog_post_retriever\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_call_id\": \"call_Ev0nA6nzGwOeMC5upJUUxTuw\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-AB7xmiPNPbMX2KvZKHM2oPfcoFMnY\",\n", + " \"content\": \"**Task Decomposition** involves breaking down a complicated or large task into smaller, more manageable subtasks. Here are some insights based on current techniques and research:\\n\\n1. **Chain of Thought (CoT)**:\\n - Introduced by Wei et al. (2022), this technique prompts the model to \\\"think step by step\\\".\\n - It helps decompose hard tasks into several simpler steps.\\n - Enhances the interpretability of the model's thought process.\\n\\n2. **Tree of Thoughts (ToT)**:\\n - An extension of CoT by Yao et al. (2023).\\n - Decomposes problems into multiple thought steps and generates several possibilities at each step.\\n - Utilizes tree structures through BFS (Breadth-First Search) or DFS (Depth-First Search) with evaluation by a classifier or majority vote.\\n\\n3. **Methods of Task Decomposition**:\\n - **Simple Prompting**: Asking the model directly, e.g., \\\"Steps for XYZ.\\\\n1.\\\" or \\\"What are the subgoals for achieving XYZ?\\\".\\n - **Task-Specific Instructions**: Tailoring instructions to the task, such as \\\"Write a story outline\\\" for writing a novel.\\n - **Human Inputs**: Receiving inputs from humans to refine the process.\\n\\n4. **LLM+P Approach**:\\n - Suggested by Liu et al. (2023), combines language models with an external classical planner.\\n - Uses Planning Domain Definition Language (PDDL) for long-horizon planning:\\n 1. Translates the problem into a PDDL problem.\\n 2. Requests an external planner to generate a PDDL plan.\\n 3. Translates the PDDL plan back into natural language.\\n - This method offloads the planning complexity to a specialized tool, especially relevant for domains utilizing robotic setups.\\n\\nTask Decomposition is a fundamental component of planning in autonomous agent systems, aiding in the efficient accomplishment of complex tasks by breaking them into smaller, actionable steps.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 411,\n", + " \"promptTokens\": 732,\n", + " \"totalTokens\": 1143\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 732,\n", + " \"output_tokens\": 411,\n", + " \"total_tokens\": 1143\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" ] - }, - { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Common ways of task decomposition include using simple prompting techniques like Chain of Thought (C\"\u001b[39m... 353 more characters,\n", - " tool_calls: [],\n", - " invalid_tool_calls: [],\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {}\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Common ways of task decomposition include using simple prompting techniques like Chain of Thought (C\"\u001b[39m... 353 more characters,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m },\n", - " response_metadata: {\n", - " tokenUsage: { completionTokens: \u001b[33m81\u001b[39m, promptTokens: \u001b[33m779\u001b[39m, totalTokens: \u001b[33m860\u001b[39m },\n", - " finish_reason: \u001b[32m\"stop\"\u001b[39m\n", - " },\n", - " tool_calls: [],\n", - " invalid_tool_calls: []\n", - "}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\"\n", - "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", - "import { formatDocumentsAsString } from \"langchain/util/document\";\n", + "const query = \"What is Task Decomposition?\"\n", "\n", - "const qaSystemPrompt = `You are an assistant for question-answering tasks.\n", - "Use the following pieces of retrieved context to answer the question.\n", - "If you don't know the answer, just say that you don't know.\n", - "Use three sentences maximum and keep the answer concise.\n", + "for await (const s of await agentExecutor.stream(\n", + " { messages: [{ role: \"user\", content: query }] },\n", + ")){\n", + " console.log(s)\n", + " console.log(\"----\")\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LangGraph comes with built in persistence, so we don't need to use `ChatMessageHistory`! Rather, we can pass in a checkpointer to our LangGraph agent directly.\n", "\n", - "{context}`\n", + "Distinct conversations are managed by specifying a key for a conversation thread in the config object, as shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "import { MemorySaver } from \"@langchain/langgraph\";\n", "\n", - "const qaPrompt = ChatPromptTemplate.fromMessages([\n", - " [\"system\", qaSystemPrompt],\n", - " new MessagesPlaceholder(\"chat_history\"),\n", - " [\"human\", \"{question}\"]\n", - "]);\n", + "const memory3 = new MemorySaver();\n", "\n", - "const contextualizedQuestion = (input: Record) => {\n", - " if (\"chat_history\" in input) {\n", - " return contextualizeQChain;\n", - " }\n", - " return input.question;\n", - "};\n", - "\n", - "const ragChain = RunnableSequence.from([\n", - " RunnablePassthrough.assign({\n", - " context: async (input: Record) => {\n", - " if (\"chat_history\" in input) {\n", - " const chain = contextualizedQuestion(input);\n", - " return chain.pipe(retriever).pipe(formatDocumentsAsString);\n", - " }\n", - " return \"\";\n", - " },\n", - " }),\n", - " qaPrompt,\n", - " llm\n", - "]);\n", + "const agentExecutor2 = createReactAgent({ llm, tools, checkpointSaver: memory3 })" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is all we need to construct a conversational RAG agent.\n", "\n", - "const chat_history = [];\n", + "Let's observe its behavior. Note that if we input a query that does not require a retrieval step, the agent does not execute one:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-AB7y8P8AGHkxOwKpwMc3qj6r0skYr\",\n", + " \"content\": \"Hello, Bob! How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 12,\n", + " \"promptTokens\": 64,\n", + " \"totalTokens\": 76\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 64,\n", + " \"output_tokens\": 12,\n", + " \"total_tokens\": 76\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" + ] + } + ], + "source": [ + "const threadId3 = uuidv4();\n", + "const config3 = { configurable: { thread_id: threadId3 } };\n", "\n", - "const question = \"What is task decomposition?\";\n", - "const aiMsg = await ragChain.invoke({ question, chat_history });\n", + "for await (const s of await agentExecutor2.stream({ messages: [{ role: \"user\", content: \"Hi! I'm bob\" }] }, config3)) {\n", + " console.log(s)\n", + " console.log(\"----\")\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Further, if we input a query that does require a retrieval step, the agent generates the input to the tool:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-AB7y8Do2IHJ2rnUvvMU3pTggmuZud\",\n", + " \"content\": \"\",\n", + " \"additional_kwargs\": {\n", + " \"tool_calls\": [\n", + " {\n", + " \"id\": \"call_3tSaOZ3xdKY4miIJdvBMR80V\",\n", + " \"type\": \"function\",\n", + " \"function\": \"[Object]\"\n", + " }\n", + " ]\n", + " },\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 19,\n", + " \"promptTokens\": 89,\n", + " \"totalTokens\": 108\n", + " },\n", + " \"finish_reason\": \"tool_calls\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [\n", + " {\n", + " \"name\": \"blog_post_retriever\",\n", + " \"args\": {\n", + " \"query\": \"Task Decomposition\"\n", + " },\n", + " \"type\": \"tool_call\",\n", + " \"id\": \"call_3tSaOZ3xdKY4miIJdvBMR80V\"\n", + " }\n", + " ],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 89,\n", + " \"output_tokens\": 19,\n", + " \"total_tokens\": 108\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " tools: {\n", + " messages: [\n", + " ToolMessage {\n", + " \"content\": \"Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\n\\nTask decomposition can be done (1) by LLM with simple prompting like \\\"Steps for XYZ.\\\\n1.\\\", \\\"What are the subgoals for achieving XYZ?\\\", (2) by using task-specific instructions; e.g. \\\"Write a story outline.\\\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\n\\nAgent System Overview\\n \\n Component One: Planning\\n \\n \\n Task Decomposition\\n \\n Self-Reflection\\n \\n \\n Component Two: Memory\\n \\n \\n Types of Memory\\n \\n Maximum Inner Product Search (MIPS)\\n \\n \\n Component Three: Tool Use\\n \\n Case Studies\\n \\n \\n Scientific Discovery Agent\\n \\n Generative Agents Simulation\\n \\n Proof-of-Concept Examples\\n \\n \\n Challenges\\n \\n Citation\\n \\n References\\n\\n(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\",\n", + " \"name\": \"blog_post_retriever\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_call_id\": \"call_3tSaOZ3xdKY4miIJdvBMR80V\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-AB7y9tpoTvM3lsrhoxCWkkerk9fb2\",\n", + " \"content\": \"Task decomposition is a methodology used to break down complex tasks into smaller, more manageable steps. Here’s an overview of various approaches to task decomposition:\\n\\n1. **Chain of Thought (CoT)**: This technique prompts a model to \\\"think step by step,\\\" which aids in transforming big tasks into multiple smaller tasks. This method enhances the model’s performance on complex tasks by making the problem more manageable and interpretable.\\n\\n2. **Tree of Thoughts (ToT)**: An extension of Chain of Thought, this approach explores multiple reasoning possibilities at each step, effectively creating a tree structure. The search process can be carried out using Breadth-First Search (BFS) or Depth-First Search (DFS), with each state evaluated by either a classifier or a majority vote.\\n\\n3. **Simple Prompting**: Involves straightforward instructions to decompose a task, such as starting with \\\"Steps for XYZ. 1.\\\" or asking \\\"What are the subgoals for achieving XYZ?\\\". This can also include task-specific instructions like \\\"Write a story outline\\\" for writing a novel.\\n\\n4. **LLM+P**: Combines Large Language Models (LLMs) with an external classical planner. The problem is translated into a Planning Domain Definition Language (PDDL) format, an external planner generates a plan, and then the plan is translated back into natural language. This approach highlights a synergy between modern AI techniques and traditional planning strategies.\\n\\nThese approaches allow complex problems to be approached and solved more efficiently by focusing on manageable sub-tasks.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 311,\n", + " \"promptTokens\": 755,\n", + " \"totalTokens\": 1066\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 755,\n", + " \"output_tokens\": 311,\n", + " \"total_tokens\": 1066\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" + ] + } + ], + "source": [ + "const query2 = \"What is Task Decomposition?\"\n", "\n", - "console.log(aiMsg)\n", + "for await (const s of await agentExecutor2.stream({ messages: [{ role: \"user\", content: query2 }] }, config3)) {\n", + " console.log(s)\n", + " console.log(\"----\")\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Above, instead of inserting our query verbatim into the tool, the agent stripped unnecessary words like \"what\" and \"is\".\n", "\n", - "chat_history.push(aiMsg);\n", + "This same principle allows the agent to use the context of the conversation when necessary:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-AB7yDE4rCOXTPZ3595GknUgVzASmt\",\n", + " \"content\": \"\",\n", + " \"additional_kwargs\": {\n", + " \"tool_calls\": [\n", + " {\n", + " \"id\": \"call_cWnDZq2aloVtMB4KjZlTxHmZ\",\n", + " \"type\": \"function\",\n", + " \"function\": \"[Object]\"\n", + " }\n", + " ]\n", + " },\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 21,\n", + " \"promptTokens\": 1089,\n", + " \"totalTokens\": 1110\n", + " },\n", + " \"finish_reason\": \"tool_calls\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [\n", + " {\n", + " \"name\": \"blog_post_retriever\",\n", + " \"args\": {\n", + " \"query\": \"common ways of task decomposition\"\n", + " },\n", + " \"type\": \"tool_call\",\n", + " \"id\": \"call_cWnDZq2aloVtMB4KjZlTxHmZ\"\n", + " }\n", + " ],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 1089,\n", + " \"output_tokens\": 21,\n", + " \"total_tokens\": 1110\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " tools: {\n", + " messages: [\n", + " ToolMessage {\n", + " \"content\": \"Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\n\\nTask decomposition can be done (1) by LLM with simple prompting like \\\"Steps for XYZ.\\\\n1.\\\", \\\"What are the subgoals for achieving XYZ?\\\", (2) by using task-specific instructions; e.g. \\\"Write a story outline.\\\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\n\\nAgent System Overview\\n \\n Component One: Planning\\n \\n \\n Task Decomposition\\n \\n Self-Reflection\\n \\n \\n Component Two: Memory\\n \\n \\n Types of Memory\\n \\n Maximum Inner Product Search (MIPS)\\n \\n \\n Component Three: Tool Use\\n \\n Case Studies\\n \\n \\n Scientific Discovery Agent\\n \\n Generative Agents Simulation\\n \\n Proof-of-Concept Examples\\n \\n \\n Challenges\\n \\n Citation\\n \\n References\\n\\nResources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\",\n", + " \"name\": \"blog_post_retriever\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_call_id\": \"call_cWnDZq2aloVtMB4KjZlTxHmZ\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-AB7yGASxz0Z0g2jiCxwx4gYHYJTi4\",\n", + " \"content\": \"According to the blog post, there are several common methods of task decomposition:\\n\\n1. **Simple Prompting by LLMs**: This involves straightforward instructions to decompose a task. Examples include:\\n - \\\"Steps for XYZ. 1.\\\"\\n - \\\"What are the subgoals for achieving XYZ?\\\"\\n - Task-specific instructions like \\\"Write a story outline\\\" for writing a novel.\\n\\n2. **Human Inputs**: Decomposition can be guided by human insights and instructions.\\n\\n3. **Chain of Thought (CoT)**: This technique prompts a model to think step-by-step, enabling it to break down complex tasks into smaller, more manageable tasks. CoT has become a standard method to enhance model performance on intricate tasks.\\n\\n4. **Tree of Thoughts (ToT)**: An extension of CoT, this approach decomposes the problem into multiple thought steps and generates several thoughts per step, forming a tree structure. The search process can be performed using Breadth-First Search (BFS) or Depth-First Search (DFS), with each state evaluated by a classifier or through a majority vote.\\n\\n5. **LLM+P (Large Language Model plus Planner)**: This method integrates LLMs with an external classical planner. It involves:\\n - Translating the problem into “Problem PDDL” (Planning Domain Definition Language).\\n - Using an external planner to generate a PDDL plan based on an existing “Domain PDDL”.\\n - Translating the PDDL plan back into natural language.\\n \\nBy utilizing these methods, tasks can be effectively decomposed into more manageable parts, allowing for more efficient problem-solving and planning.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 334,\n", + " \"promptTokens\": 1746,\n", + " \"totalTokens\": 2080\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_52a7f40b0b\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 1746,\n", + " \"output_tokens\": 334,\n", + " \"total_tokens\": 2080\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" + ] + } + ], + "source": [ + "const query3 = \"What according to the blog post are common ways of doing it? redo the search\"\n", "\n", - "const secondQuestion = \"What are common ways of doing it?\";\n", - "await ragChain.invoke({ question: secondQuestion, chat_history });" + "for await (const s of await agentExecutor2.stream({ messages: [{ role: \"user\", content: query3 }] }, config3)) {\n", + " console.log(s)\n", + " console.log(\"----\")\n", + "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "See the first [LangSmith trace here](https://smith.langchain.com/public/527981c6-5018-4b68-a11a-ebcde77843e7/r) and the [second trace here](https://smith.langchain.com/public/7b97994a-ab9f-4bf3-a2e4-abb609e5610a/r)" + "Note that the agent was able to infer that \"it\" in our query refers to \"task decomposition\", and generated a reasonable search query as a result-- in this case, \"common ways of task decomposition\"." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tying it together\n", + "\n", + "For convenience, we tie together all of the necessary steps in a single code cell:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import { createRetrieverTool } from \"langchain/tools/retriever\";\n", + "import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n", + "import { MemorySaver } from \"@langchain/langgraph\";\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "import { CheerioWebBaseLoader } from \"@langchain/community/document_loaders/web/cheerio\";\n", + "import { RecursiveCharacterTextSplitter } from \"langchain/text_splitter\";\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\"\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "const llm3 = new ChatOpenAI({ model: \"gpt-4o\" });\n", + "\n", + "const loader3 = new CheerioWebBaseLoader(\n", + " \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\n", + ");\n", + "\n", + "const docs3 = await loader3.load();\n", + "\n", + "const textSplitter3 = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });\n", + "const splits3 = await textSplitter3.splitDocuments(docs3);\n", + "const vectorStore3 = await MemoryVectorStore.fromDocuments(splits3, new OpenAIEmbeddings());\n", + "\n", + "// Retrieve and generate using the relevant snippets of the blog.\n", + "const retriever3 = vectorStore3.asRetriever();\n", + "\n", + "const tool2 = createRetrieverTool(\n", + " retriever3,\n", + " {\n", + " name: \"blog_post_retriever\",\n", + " description: \"Searches and returns excerpts from the Autonomous Agents blog post.\",\n", + " }\n", + ")\n", + "const tools2 = [tool2]\n", + "const memory4 = new MemorySaver();\n", + "\n", + "const agentExecutor3 = createReactAgent({ llm: llm3, tools: tools2, checkpointSaver: memory4 })" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Here we've gone over how to add application logic for incorporating historical outputs, but we're still manually updating the chat history and inserting it into each input. In a real Q&A application we'll want some way of persisting chat history and some way of automatically inserting and updating it.\n", + "## Next steps\n", + "\n", + "We've covered the steps to build a basic conversational Q&A application:\n", "\n", - "For this we can use:\n", + "- We used chains to build a predictable application that generates search queries for each user input;\n", + "- We used agents to build an application that \"decides\" when and how to generate search queries.\n", "\n", - "- [BaseChatMessageHistory](https://api.js.langchain.com/classes/langchain_core.chat_history.BaseChatMessageHistory.html): Store chat history.\n", - "- [RunnableWithMessageHistory](/docs/how_to/message_history/): Wrapper for an LCEL chain and a `BaseChatMessageHistory` that handles injecting chat history into inputs and updating it after each invocation.\n", + "To explore different types of retrievers and retrieval strategies, visit the [retrievers](/docs/how_to#retrievers) section of the how-to guides.\n", "\n", - "For a detailed walkthrough of how to use these classes together to create a stateful conversational chain, head to the [How to add message history (memory)](/docs/how_to/message_history/) LCEL page." + "For a detailed walkthrough of LangChain's conversation memory abstractions, visit the [How to add message history (memory)](/docs/how_to/message_history) LCEL page.\n" ] } ], "metadata": { "kernelspec": { - "display_name": "Deno", + "display_name": "TypeScript", "language": "typescript", - "name": "deno" + "name": "tslab" }, "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, "file_extension": ".ts", - "mimetype": "text/x.typescript", + "mimetype": "text/typescript", "name": "typescript", - "nb_converter": "script", - "pygments_lexer": "typescript", - "version": "5.3.3" + "version": "3.7.2" } }, "nbformat": 4, diff --git a/docs/core_docs/docs/tutorials/chatbot.ipynb b/docs/core_docs/docs/tutorials/chatbot.ipynb index 829549e8c1c8..91490c7fa1d0 100644 --- a/docs/core_docs/docs/tutorials/chatbot.ipynb +++ b/docs/core_docs/docs/tutorials/chatbot.ipynb @@ -2,10 +2,15 @@ "cells": [ { "cell_type": "raw", - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "raw" + } + }, "source": [ "---\n", "sidebar_position: 1\n", + "keywords: [conversationchain]\n", "---" ] }, @@ -13,14 +18,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Build a Chatbot" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Overview\n", + "# Build a Chatbot\n", + "\n", "\n", ":::info Prerequisites\n", "\n", @@ -30,34 +29,57 @@ "- [Prompt Templates](/docs/concepts/#prompt-templates)\n", "- [Chat History](/docs/concepts/#chat-history)\n", "\n", + "This guide requires `langgraph >= 0.2.28`.\n", + "\n", + ":::\n", + "\n", + "\n", + "```{=mdx}\n", + "\n", + ":::note\n", + "\n", + "This tutorial previously built a chatbot using [RunnableWithMessageHistory](https://api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html). You can access this version of the tutorial in the [v0.2 docs](https://js.langchain.com/v0.2/docs/tutorials/chatbot/).\n", + "\n", + "The LangGraph implementation offers a number of advantages over `RunnableWithMessageHistory`, including the ability to persist arbitrary components of an application's state (instead of only messages).\n", + "\n", ":::\n", "\n", + "```\n", + "\n", + "## Overview\n", + "\n", "We'll go over an example of how to design and implement an LLM-powered chatbot. \n", "This chatbot will be able to have a conversation and remember previous interactions.\n", "\n", + "\n", "Note that this chatbot that we build will only use the language model to have a conversation.\n", "There are several other related concepts that you may be looking for:\n", "\n", "- [Conversational RAG](/docs/tutorials/qa_chat_history): Enable a chatbot experience over an external source of data\n", - "- [Agents](https://langchain-ai.github.io/langgraphjs/tutorials/quickstart/): Build a chatbot that can take actions\n", + "- [Agents](https://langchain-ai.github.io/langgraphjs/tutorials/multi_agent/agent_supervisor/): Build a chatbot that can take actions\n", "\n", "This tutorial will cover the basics which will be helpful for those two more advanced topics, but feel free to skip directly to there should you choose.\n", "\n", "## Setup\n", "\n", + "### Jupyter Notebook\n", + "\n", + "This guide (and most of the other guides in the documentation) uses [Jupyter notebooks](https://jupyter.org/) and assumes the reader is as well. Jupyter notebooks are perfect for learning how to work with LLM systems because oftentimes things can go wrong (unexpected output, API down, etc) and going through guides in an interactive environment is a great way to better understand them.\n", + "\n", + "This and other tutorials are perhaps most conveniently run in a Jupyter notebook. See [here](https://jupyter.org/install) for instructions on how to install.\n", + "\n", "### Installation\n", "\n", - "To install LangChain run:\n", + "For this tutorial we will need `@langchain/core` and `langgraph`:\n", "\n", "```{=mdx}\n", "import Npm2Yarn from \"@theme/Npm2Yarn\"\n", "\n", "\n", - " langchain @langchain/core\n", + " @langchain/core @langchain/langgraph uuid\n", "\n", "```\n", "\n", - "\n", "For more details, see our [Installation guide](/docs/how_to/installation).\n", "\n", "### LangSmith\n", @@ -68,35 +90,25 @@ "\n", "After you sign up at the link above, make sure to set your environment variables to start logging traces:\n", "\n", - "```shell\n", - "export LANGCHAIN_TRACING_V2=\"true\"\n", - "export LANGCHAIN_API_KEY=\"...\"\n", - "\n", - "# Reduce tracing latency if you are not in a serverless environment\n", - "# export LANGCHAIN_CALLBACKS_BACKGROUND=true\n", + "```typescript\n", + "process.env.LANGCHAIN_TRACING_V2 = \"true\"\n", + "process.env.LANGCHAIN_API_KEY = \"...\"\n", "```\n", "\n", "## Quickstart\n", "\n", - "First up, let's learn how to use a language model by itself. LangChain supports many different language models that you can use interchangably - select the one you want to use below!\n", + "First up, let's learn how to use a language model by itself. LangChain supports many different language models that you can use interchangeably - select the one you want to use below!\n", "\n", "```{=mdx}\n", "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", "\n", - "\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's first use the model directly. `ChatModel`s are instances of LangChain \"Runnables\", which means they expose a standard interface for interacting with them. To just simply call the model, we can pass in a list of messages to the `.invoke` method." + "\n", + "```\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -104,51 +116,51 @@ "\n", "import { ChatOpenAI } from \"@langchain/openai\";\n", "\n", - "const model = new ChatOpenAI({\n", - " model: \"gpt-4o-mini\",\n", - " temperature: 0,\n", - "});" + "const llm = new ChatOpenAI({ model: \"gpt-4o-mini\" })" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's first use the model directly. `ChatModel`s are instances of LangChain \"Runnables\", which means they expose a standard interface for interacting with them. To just simply call the model, we can pass in a list of messages to the `.invoke` method." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 28, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " \"id\": \"chatcmpl-A64of8iD4GIFNSYlOaFHxPdCeyl9E\",\n", - " \"content\": \"Hi Bob! How can I assist you today?\",\n", - " \"additional_kwargs\": {},\n", - " \"response_metadata\": {\n", - " \"tokenUsage\": {\n", - " \"completionTokens\": 10,\n", - " \"promptTokens\": 11,\n", - " \"totalTokens\": 21\n", - " },\n", - " \"finish_reason\": \"stop\"\n", - " },\n", - " \"tool_calls\": [],\n", - " \"invalid_tool_calls\": [],\n", - " \"usage_metadata\": {\n", - " \"input_tokens\": 11,\n", - " \"output_tokens\": 10,\n", - " \"total_tokens\": 21\n", - " }\n", - "}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXeSO4JQpxO96lj7iudUptJ6nfW\",\n", + " \"content\": \"Hi Bob! How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 10,\n", + " \"promptTokens\": 10,\n", + " \"totalTokens\": 20\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 10,\n", + " \"output_tokens\": 10,\n", + " \"total_tokens\": 20\n", + " }\n", + "}\n" + ] } ], "source": [ - "import { HumanMessage } from \"@langchain/core/messages\";\n", - "\n", - "await model.invoke([new HumanMessage({ content: \"Hi! I'm Bob\" })]);" + "await llm.invoke([{ role: \"user\", content: \"Hi im bob\" }])" ] }, { @@ -160,48 +172,46 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 29, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " \"id\": \"chatcmpl-A64ogC7owxmPla3ggZERNCFZpVHSp\",\n", - " \"content\": \"I'm sorry, but I don't have access to personal information about users unless it has been shared with me in the course of our conversation. If you'd like to tell me your name, feel free!\",\n", - " \"additional_kwargs\": {},\n", - " \"response_metadata\": {\n", - " \"tokenUsage\": {\n", - " \"completionTokens\": 39,\n", - " \"promptTokens\": 11,\n", - " \"totalTokens\": 50\n", - " },\n", - " \"finish_reason\": \"stop\"\n", - " },\n", - " \"tool_calls\": [],\n", - " \"invalid_tool_calls\": [],\n", - " \"usage_metadata\": {\n", - " \"input_tokens\": 11,\n", - " \"output_tokens\": 39,\n", - " \"total_tokens\": 50\n", - " }\n", - "}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXe1Zih4gMe3XgotWL83xeWub2h\",\n", + " \"content\": \"I'm sorry, but I don't have access to personal information about individuals unless it has been shared with me during our conversation. If you'd like to tell me your name, feel free to do so!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 39,\n", + " \"promptTokens\": 10,\n", + " \"totalTokens\": 49\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 10,\n", + " \"output_tokens\": 39,\n", + " \"total_tokens\": 49\n", + " }\n", + "}\n" + ] } ], "source": [ - "await model.invoke([new HumanMessage({ content: \"What's my name?\" })])" + "await llm.invoke([{ role: \"user\", content: \"Whats my name\" }])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's take a look at the example [LangSmith trace](https://smith.langchain.com/public/e5a0ae1b-32b9-4beb-836d-38f40bfa6762/r)\n", + "Let's take a look at the example [LangSmith trace](https://smith.langchain.com/public/3b768e44-a319-453a-bd6e-30f9df75f16a/r)\n", "\n", "We can see that it doesn't take the previous conversation turn into context, and cannot answer the question.\n", "This makes for a terrible chatbot experience!\n", @@ -211,49 +221,43 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 30, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "AIMessage {\n", - " \"id\": \"chatcmpl-A64ohhg3P4BuIiw8mUCLI3zYHNOvS\",\n", - " \"content\": \"Your name is Bob! How can I help you today, Bob?\",\n", - " \"additional_kwargs\": {},\n", - " \"response_metadata\": {\n", - " \"tokenUsage\": {\n", - " \"completionTokens\": 14,\n", - " \"promptTokens\": 33,\n", - " \"totalTokens\": 47\n", - " },\n", - " \"finish_reason\": \"stop\"\n", - " },\n", - " \"tool_calls\": [],\n", - " \"invalid_tool_calls\": [],\n", - " \"usage_metadata\": {\n", - " \"input_tokens\": 33,\n", - " \"output_tokens\": 14,\n", - " \"total_tokens\": 47\n", - " }\n", - "}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXfX4Fnp247rOxyPlBUYMQgahj2\",\n", + " \"content\": \"Your name is Bob! How can I help you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 12,\n", + " \"promptTokens\": 33,\n", + " \"totalTokens\": 45\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 33,\n", + " \"output_tokens\": 12,\n", + " \"total_tokens\": 45\n", + " }\n", + "}\n" + ] } ], "source": [ - "import { AIMessage } from \"@langchain/core/messages\"\n", - "\n", - "await model.invoke(\n", - " [\n", - " new HumanMessage({ content: \"Hi! I'm Bob\" }),\n", - " new AIMessage({ content: \"Hello Bob! How can I assist you today?\" }),\n", - " new HumanMessage({ content: \"What's my name?\" }),\n", - " ]\n", - ");" + "await llm.invoke([\n", + " { role: \"user\", content: \"Hi! I'm Bob\" },\n", + " { role: \"assistant\", content: \"Hello Bob! How can I assist you today?\" },\n", + " { role: \"user\", content: \"What's my name?\" }\n", + "]);" ] }, { @@ -270,153 +274,208 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Message History\n", + "## Message persistence\n", + "\n", + "[LangGraph](https://langchain-ai.github.io/langgraphjs/) implements a built-in persistence layer, making it ideal for chat applications that support multiple conversational turns.\n", + "\n", + "Wrapping our chat model in a minimal LangGraph application allows us to automatically persist the message history, simplifying the development of multi-turn applications.\n", "\n", - "We can use a Message History class to wrap our model and make it stateful.\n", - "This will keep track of inputs and outputs of the model, and store them in some datastore.\n", - "Future interactions will then load those messages and pass them into the chain as part of the input.\n", - "Let's see how to use this!" + "LangGraph comes with a simple in-memory checkpointer, which we use below." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 31, "metadata": {}, + "outputs": [], "source": [ - "We import the relevant classes and set up our chain which wraps the model and adds in this message history. A key part here is the function we pass into as the `getSessionHistory()`. This function is expected to take in a `sessionId` and return a Message History object. This `sessionId` is used to distinguish between separate conversations, and should be passed in as part of the config when calling the new chain.\n", + "import { START, END, MessagesAnnotation, StateGraph, MemorySaver } from \"@langchain/langgraph\";\n", "\n", - "Let's also create a simple chain by adding a prompt to help with formatting:" + "// Define the function that calls the model\n", + "const callModel = async (state: typeof MessagesAnnotation.State) => {\n", + " const response = await llm.invoke(state.messages);\n", + " return { messages: response };\n", + "};\n", + "\n", + "// Define a new graph\n", + "const workflow = new StateGraph(MessagesAnnotation)\n", + " // Define the node and edge\n", + " .addNode(\"model\", callModel)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "// Add memory\n", + "const memory = new MemorySaver();\n", + "const app = workflow.compile({ checkpointer: memory });" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now need to create a `config` that we pass into the runnable every time. This config contains information that is not part of the input directly, but is still useful. In this case, we want to include a `thread_id`. This should look like:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ - "// We use an ephemeral, in-memory chat history for this demo.\n", - "import { InMemoryChatMessageHistory } from \"@langchain/core/chat_history\";\n", - "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", - "import { RunnableWithMessageHistory } from \"@langchain/core/runnables\";\n", + "import { v4 as uuidv4 } from \"uuid\";\n", "\n", - "const messageHistories: Record = {};\n", - "\n", - "const prompt = ChatPromptTemplate.fromMessages([\n", - " [\"system\", `You are a helpful assistant who remembers all details the user shares with you.`],\n", - " [\"placeholder\", \"{chat_history}\"],\n", - " [\"human\", \"{input}\"],\n", - "]);\n", - "\n", - "const chain = prompt.pipe(model);\n", - "\n", - "const withMessageHistory = new RunnableWithMessageHistory({\n", - " runnable: chain,\n", - " getMessageHistory: async (sessionId) => {\n", - " if (messageHistories[sessionId] === undefined) {\n", - " messageHistories[sessionId] = new InMemoryChatMessageHistory();\n", - " }\n", - " return messageHistories[sessionId];\n", - " },\n", - " inputMessagesKey: \"input\",\n", - " historyMessagesKey: \"chat_history\",\n", - "});" + "const config = { configurable: { thread_id: uuidv4() } };" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We now need to create a `config` that we pass into the runnable every time. This config contains information that is not part of the input directly, but is still useful. In this case, we want to include a `sessionId`. This should look like:" + "This enables us to support multiple conversation threads with a single application, a common requirement when your application has multiple users.\n", + "\n", + "We can then invoke the application:" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 33, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"Hi Bob! How can I assist you today?\"\u001b[39m" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXfjqCno78CGXCHoAgamqXG1pnZ\",\n", + " \"content\": \"Hi Bob! How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 10,\n", + " \"promptTokens\": 12,\n", + " \"totalTokens\": 22\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 12,\n", + " \"output_tokens\": 10,\n", + " \"total_tokens\": 22\n", + " }\n", + "}\n" + ] } ], "source": [ - "const config = {\n", - " configurable: {\n", - " sessionId: \"abc2\"\n", + "const input = [\n", + " {\n", + " role: \"user\",\n", + " content: \"Hi! I'm Bob.\",\n", " }\n", - "};\n", - "\n", - "const response = await withMessageHistory.invoke({\n", - " input: \"Hi! I'm Bob\",\n", - "}, config);\n", - "\n", - "response.content;" + "]\n", + "const output = await app.invoke({ messages: input }, config)\n", + "// The output contains all messages in the state.\n", + "// This will long the last message in the conversation.\n", + "console.log(output.messages[output.messages.length - 1]);" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 34, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"Your name is Bob. How can I help you today?\"\u001b[39m" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXgzHFHk4KsaNmDJyvflHq4JY2L\",\n", + " \"content\": \"Your name is Bob! How can I help you today, Bob?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 14,\n", + " \"promptTokens\": 34,\n", + " \"totalTokens\": 48\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 34,\n", + " \"output_tokens\": 14,\n", + " \"total_tokens\": 48\n", + " }\n", + "}\n" + ] } ], "source": [ - "const followupResponse = await withMessageHistory.invoke({\n", - " input: \"What's my name?\",\n", - "}, config);\n", - "\n", - "followupResponse.content" + "const input2 = [\n", + " {\n", + " role: \"user\",\n", + " content: \"What's my name?\",\n", + " }\n", + "]\n", + "const output2 = await app.invoke({ messages: input2 }, config)\n", + "console.log(output2.messages[output2.messages.length - 1]);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Great! Our chatbot now remembers things about us. If we change the config to reference a different `sessionId`, we can see that it starts the conversation fresh." + "Great! Our chatbot now remembers things about us. If we change the config to reference a different `thread_id`, we can see that it starts the conversation fresh." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 35, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"I'm sorry, but I don't have your name. If you tell me, I'll remember it for our future conversations\"\u001b[39m... 1 more character" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXhT4EVx8mGgmKXJ1s132qEluxR\",\n", + " \"content\": \"I'm sorry, but I don’t have access to personal data about individuals unless it has been shared in the course of our conversation. Therefore, I don't know your name. How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 41,\n", + " \"promptTokens\": 11,\n", + " \"totalTokens\": 52\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 11,\n", + " \"output_tokens\": 41,\n", + " \"total_tokens\": 52\n", + " }\n", + "}\n" + ] } ], "source": [ - "const config2 = {\n", - " configurable: {\n", - " sessionId: \"abc3\"\n", + "const config2 = { configurable: { thread_id: uuidv4() } }\n", + "const input3 = [\n", + " {\n", + " role: \"user\",\n", + " content: \"What's my name?\",\n", " }\n", - "};\n", - "\n", - "const response2 = await withMessageHistory.invoke({\n", - " input: \"What's my name?\",\n", - "}, config2);\n", - "\n", - "response2.content" + "]\n", + "const output3 = await app.invoke({ messages: input3 }, config2)\n", + "console.log(output3.messages[output3.messages.length - 1]);" ] }, { @@ -428,338 +487,623 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 36, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"Your name is Bob. What would you like to talk about?\"\u001b[39m" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXhZmtzvV3kqKig47xxhKEnvVfH\",\n", + " \"content\": \"Your name is Bob! If there's anything else you'd like to talk about or ask, feel free!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 20,\n", + " \"promptTokens\": 60,\n", + " \"totalTokens\": 80\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 60,\n", + " \"output_tokens\": 20,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n" + ] } ], "source": [ - "const config3 = {\n", - " configurable: {\n", - " sessionId: \"abc2\"\n", - " }\n", - "};\n", + "const output4 = await app.invoke({ messages: input2 }, config)\n", + "console.log(output4.messages[output4.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is how we can support a chatbot having conversations with many users!\n", + "\n", + "Right now, all we've done is add a simple persistence layer around the model. We can start to make the more complicated and personalized by adding in a prompt template.\n", "\n", - "const response3 = await withMessageHistory.invoke({\n", - " input: \"What's my name?\",\n", - "}, config3);\n", + "## Prompt templates\n", "\n", - "response3.content" + "Prompt Templates help to turn raw user information into a format that the LLM can work with. In this case, the raw user input is just a message, which we are passing to the LLM. Let's now make that a bit more complicated. First, let's add in a system message with some custom instructions (but still taking messages as input). Next, we'll add in more input besides just the messages.\n", + "\n", + "To add in a system message, we will create a `ChatPromptTemplate`. We will utilize `MessagesPlaceholder` to pass all the messages in." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 37, "metadata": {}, + "outputs": [], "source": [ - "This is how we can support a chatbot having conversations with many users!" + "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromMessages([\n", + " [\"system\", \"You talk like a pirate. Answer all questions to the best of your ability.\"],\n", + " new MessagesPlaceholder(\"messages\"),\n", + "]);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Managing Conversation History\n", - "\n", - "One important concept to understand when building chatbots is how to manage conversation history. If left unmanaged, the list of messages will grow unbounded and potentially overflow the context window of the LLM. Therefore, it is important to add a step that limits the size of the messages you are passing in.\n", - "\n", - "**Importantly, you will want to do this BEFORE the prompt template but AFTER you load previous messages from Message History.**\n", - "\n", - "We can do this by adding a simple step in front of the prompt that modifies the `chat_history` key appropriately, and then wrap that new chain in the Message History class. First, let's define a function that will modify the messages passed in. Let's make it so that it selects the 10 most recent messages. We can then create a new chain by adding that at the start." + "We can now update our application to incorporate this template:" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ - "import type { BaseMessage } from \"@langchain/core/messages\";\n", - "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", - "\n", - "type ChainInput = {\n", - " chat_history: BaseMessage[];\n", - " input: string;\n", - "}\n", + "import { START, END, MessagesAnnotation, StateGraph, MemorySaver } from \"@langchain/langgraph\";\n", + "\n", + "// Define the function that calls the model\n", + "const callModel2 = async (state: typeof MessagesAnnotation.State) => {\n", + " // highlight-start\n", + " const chain = prompt.pipe(llm);\n", + " const response = await chain.invoke(state);\n", + " // highlight-end\n", + " // Update message history with response:\n", + " return { messages: [response] };\n", + "};\n", "\n", - "const filterMessages = (input: ChainInput) => input.chat_history.slice(-10);\n", + "// Define a new graph\n", + "const workflow2 = new StateGraph(MessagesAnnotation)\n", + " // Define the (single) node in the graph\n", + " .addNode(\"model\", callModel2)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", "\n", - "const chain2 = RunnableSequence.from([\n", - " RunnablePassthrough.assign({\n", - " chat_history: filterMessages\n", - " }),\n", - " prompt,\n", - " model,\n", - "]);" + "// Add memory\n", + "const app2 = workflow2.compile({ checkpointer: new MemorySaver() });" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's now try it out! If we create a list of messages more than 10 messages long, we can see what it no longer remembers information in the early messages." + "We invoke the application in the same way:" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 39, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXio2Vy1YNRDiFdKKEyN3Yw1B9I\",\n", + " \"content\": \"Ahoy, Jim! What brings ye to these treacherous waters today? Speak up, matey!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 22,\n", + " \"promptTokens\": 32,\n", + " \"totalTokens\": 54\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 32,\n", + " \"output_tokens\": 22,\n", + " \"total_tokens\": 54\n", + " }\n", + "}\n" + ] + } + ], "source": [ - "const messages = [\n", - " new HumanMessage({ content: \"hi! I'm bob\" }),\n", - " new AIMessage({ content: \"hi!\" }),\n", - " new HumanMessage({ content: \"I like vanilla ice cream\" }),\n", - " new AIMessage({ content: \"nice\" }),\n", - " new HumanMessage({ content: \"whats 2 + 2\" }),\n", - " new AIMessage({ content: \"4\" }),\n", - " new HumanMessage({ content: \"thanks\" }),\n", - " new AIMessage({ content: \"No problem!\" }),\n", - " new HumanMessage({ content: \"having fun?\" }),\n", - " new AIMessage({ content: \"yes!\" }),\n", - " new HumanMessage({ content: \"That's great!\" }),\n", - " new AIMessage({ content: \"yes it is!\" }),\n", - "];" + "const config3 = { configurable: { thread_id: uuidv4() } }\n", + "const input4 = [\n", + " {\n", + " role: \"user\",\n", + " content: \"Hi! I'm Jim.\",\n", + " }\n", + "]\n", + "const output5 = await app2.invoke({ messages: input4 }, config3)\n", + "console.log(output5.messages[output5.messages.length - 1]);" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 40, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"You haven't shared your name with me yet. What is it?\"\u001b[39m" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXjZNHiT5g7eTf52auWGXDUUcDs\",\n", + " \"content\": \"Ye be callin' yerself Jim, if me memory serves me right! Arrr, what else can I do fer ye, matey?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 31,\n", + " \"promptTokens\": 67,\n", + " \"totalTokens\": 98\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_3a215618e8\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 67,\n", + " \"output_tokens\": 31,\n", + " \"total_tokens\": 98\n", + " }\n", + "}\n" + ] } ], "source": [ - "const response4 = await chain2.invoke(\n", + "const input5 = [\n", " {\n", - " chat_history: messages,\n", - " input: \"what's my name?\"\n", + " role: \"user\",\n", + " content: \"What is my name?\"\n", " }\n", - ")\n", - "response4.content" + "]\n", + "const output6 = await app2.invoke({ messages: input5 }, config3)\n", + "console.log(output6.messages[output6.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Awesome! Let's now make our prompt a little bit more complicated. Let's assume that the prompt template now looks something like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "const prompt2 = ChatPromptTemplate.fromMessages([\n", + " [\"system\", \"You are a helpful assistant. Answer all questions to the best of your ability in {language}.\"],\n", + " new MessagesPlaceholder(\"messages\"),\n", + "]);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "But if we ask about information that is within the last ten messages, it still remembers it" + "Note that we have added a new `language` input to the prompt. Our application now has two parameters-- the input `messages` and `language`. We should update our application's state to reflect this:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "import { START, END, StateGraph, MemorySaver, MessagesAnnotation, Annotation } from \"@langchain/langgraph\";\n", + "\n", + "// Define the State\n", + "const GraphAnnotation = Annotation.Root({\n", + " ...MessagesAnnotation.spec,\n", + " language: Annotation(),\n", + "});\n", + "\n", + "// Define the function that calls the model\n", + "const callModel3 = async (state: typeof GraphAnnotation.State) => {\n", + " const chain = prompt2.pipe(llm);\n", + " const response = await chain.invoke(state);\n", + " return { messages: [response] };\n", + "};\n", + "\n", + "const workflow3 = new StateGraph(GraphAnnotation)\n", + " .addNode(\"model\", callModel3)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "const app3 = workflow3.compile({ checkpointer: new MemorySaver() });" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 43, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"Your favorite ice cream is vanilla!\"\u001b[39m" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXkq2ZV9xmOBSM2iJbYSn8Epvqa\",\n", + " \"content\": \"¡Hola, Bob! ¿En qué puedo ayudarte hoy?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 12,\n", + " \"promptTokens\": 32,\n", + " \"totalTokens\": 44\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 32,\n", + " \"output_tokens\": 12,\n", + " \"total_tokens\": 44\n", + " }\n", + "}\n" + ] } ], "source": [ - "const response5 = await chain2.invoke(\n", - " {\n", - " chat_history: messages,\n", - " input: \"what's my fav ice cream\"\n", - " }\n", - ")\n", - "response5.content" + "const config4 = { configurable: { thread_id: uuidv4() } }\n", + "const input6 = {\n", + " messages: [\n", + " {\n", + " role: \"user\",\n", + " content: \"Hi im bob\"\n", + " }\n", + " ],\n", + " language: \"Spanish\"\n", + "}\n", + "const output7 = await app3.invoke(input6, config4)\n", + "console.log(output7.messages[output7.messages.length - 1]);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's now wrap this chain in a `RunnableWithMessageHistory` constructor. For demo purposes, we will also slightly modify our `getMessageHistory()` method to always start new sessions with the previously declared list of 10 messages to simulate several conversation turns:" + "Note that the entire state is persisted, so we can omit parameters like `language` if no changes are desired:" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 44, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"You haven't shared your name with me yet. What is it?\"\u001b[39m" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUXk9Ccr1dhmA9lZ1VmZ998PFyJF\",\n", + " \"content\": \"Tu nombre es Bob. ¿Hay algo más en lo que te pueda ayudar?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 16,\n", + " \"promptTokens\": 57,\n", + " \"totalTokens\": 73\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 57,\n", + " \"output_tokens\": 16,\n", + " \"total_tokens\": 73\n", + " }\n", + "}\n" + ] } ], "source": [ - "const messageHistories2: Record = {};\n", - "\n", - "const withMessageHistory2 = new RunnableWithMessageHistory({\n", - " runnable: chain2,\n", - " getMessageHistory: async (sessionId) => {\n", - " if (messageHistories2[sessionId] === undefined) {\n", - " const messageHistory = new InMemoryChatMessageHistory();\n", - " await messageHistory.addMessages(messages);\n", - " messageHistories2[sessionId] = messageHistory;\n", + "const input7 = {\n", + " messages: [\n", + " {\n", + " role: \"user\",\n", + " content: \"What is my name?\"\n", " }\n", - " return messageHistories2[sessionId];\n", - " },\n", - " inputMessagesKey: \"input\",\n", - " historyMessagesKey: \"chat_history\",\n", - "})\n", - "\n", - "const config4 = {\n", - " configurable: {\n", - " sessionId: \"abc4\"\n", - " }\n", - "};\n", - "\n", - "const response7 = await withMessageHistory2.invoke(\n", - " {\n", - " input: \"whats my name?\",\n", - " chat_history: [],\n", - " },\n", - " config4,\n", - ")\n", - "\n", - "response7.content" + " ],\n", + "}\n", + "const output8 = await app3.invoke(input7, config4)\n", + "console.log(output8.messages[output8.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To help you understand what's happening internally, check out [this LangSmith trace](https://smith.langchain.com/public/d61630b7-6a52-4dc9-974c-8452008c498a/r)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "There's now two new messages in the chat history. This means that even more information that used to be accessible in our conversation history is no longer available!" + "## Managing Conversation History\n", + "\n", + "One important concept to understand when building chatbots is how to manage conversation history. If left unmanaged, the list of messages will grow unbounded and potentially overflow the context window of the LLM. Therefore, it is important to add a step that limits the size of the messages you are passing in.\n", + "\n", + "**Importantly, you will want to do this BEFORE the prompt template but AFTER you load previous messages from Message History.**\n", + "\n", + "We can do this by adding a simple step in front of the prompt that modifies the `messages` key appropriately, and then wrap that new chain in the Message History class. \n", + "\n", + "LangChain comes with a few built-in helpers for [managing a list of messages](/docs/how_to/#messages). In this case we'll use the [trimMessages](/docs/how_to/trim_messages/) helper to reduce how many messages we're sending to the model. The trimmer allows us to specify how many tokens we want to keep, along with other parameters like if we want to always keep the system message and whether to allow partial messages:" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 54, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[32m\"You haven't mentioned your favorite ice cream yet. What is it?\"\u001b[39m" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " SystemMessage {\n", + " \"content\": \"you're a good assistant\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " HumanMessage {\n", + " \"content\": \"I like vanilla ice cream\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"content\": \"nice\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"content\": \"whats 2 + 2\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"content\": \"4\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"content\": \"thanks\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"content\": \"no problem!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " },\n", + " HumanMessage {\n", + " \"content\": \"having fun?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + " },\n", + " AIMessage {\n", + " \"content\": \"yes!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " }\n", + "]\n" + ] } ], "source": [ - "const response8 = await withMessageHistory2.invoke({\n", - " input: \"whats my favorite ice cream?\",\n", - " chat_history: [],\n", - "}, config4);\n", + "import { SystemMessage, HumanMessage, AIMessage, trimMessages } from \"@langchain/core/messages\"\n", + "\n", + "const trimmer = trimMessages({\n", + " maxTokens: 10,\n", + " strategy: \"last\",\n", + " tokenCounter: (msgs) => msgs.length,\n", + " includeSystem: true,\n", + " allowPartial: false,\n", + " startOn: \"human\",\n", + "})\n", "\n", - "response8.content" + "const messages = [\n", + " new SystemMessage(\"you're a good assistant\"),\n", + " new HumanMessage(\"hi! I'm bob\"),\n", + " new AIMessage(\"hi!\"),\n", + " new HumanMessage(\"I like vanilla ice cream\"),\n", + " new AIMessage(\"nice\"),\n", + " new HumanMessage(\"whats 2 + 2\"),\n", + " new AIMessage(\"4\"),\n", + " new HumanMessage(\"thanks\"),\n", + " new AIMessage(\"no problem!\"),\n", + " new HumanMessage(\"having fun?\"),\n", + " new AIMessage(\"yes!\"),\n", + "]\n", + "\n", + "await trimmer.invoke(messages)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "If you take a look at LangSmith, you can see exactly what is happening under the hood in the [LangSmith trace](https://smith.langchain.com/public/ebc2e1e7-0703-43f7-a476-8cb8cbd7f61a/r). Navigate to the chat model call to see exactly which messages are getting filtered out." + "To use it in our chain, we just need to run the trimmer before we pass the `messages` input to our prompt. " ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 55, "metadata": {}, + "outputs": [], "source": [ - "## Streaming\n", + "const callModel4 = async (state: typeof GraphAnnotation.State) => {\n", + " const chain = prompt2.pipe(llm);\n", + " // highlight-start\n", + " const trimmedMessage = await trimmer.invoke(state.messages);\n", + " const response = await chain.invoke({ messages: trimmedMessage, language: state.language });\n", + " // highlight-end\n", + " return { messages: [response] };\n", + "};\n", "\n", - "Now we've got a functional chatbot. However, one *really* important UX consideration for chatbot application is streaming. LLMs can sometimes take a while to respond, and so in order to improve the user experience one thing that most application do is stream back each token as it is generated. This allows the user to see progress.\n", "\n", - "It's actually super easy to do this!\n", + "const workflow4 = new StateGraph(GraphAnnotation)\n", + " .addNode(\"model\", callModel4)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", "\n", - "All chains expose a `.stream()` method, and ones that use message history are no different. We can simply use that method to get back a streaming response." + "const app4 = workflow4.compile({ checkpointer: new MemorySaver() });" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now if we try asking the model our name, it won't know it since we trimmed that part of the chat history:" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "| \n", - "| Hi\n", - "| Todd\n", - "| !\n", - "| Here\n", - "| ’s\n", - "| a\n", - "| joke\n", - "| for\n", - "| you\n", - "| :\n", - "| \n", - "\n", - "\n", - "| Why\n", - "| did\n", - "| the\n", - "| scare\n", - "| crow\n", - "| win\n", - "| an\n", - "| award\n", - "| ?\n", - "| \n", - "\n", - "\n", - "| Because\n", - "| he\n", - "| was\n", - "| outstanding\n", - "| in\n", - "| his\n", - "| field\n", - "| !\n", - "| \n" + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUdCOvzRAvgoxd2sf93oGKQfA9vh\",\n", + " \"content\": \"I don’t know your name, but I’d be happy to learn it if you’d like to share!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 22,\n", + " \"promptTokens\": 97,\n", + " \"totalTokens\": 119\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 97,\n", + " \"output_tokens\": 22,\n", + " \"total_tokens\": 119\n", + " }\n", + "}\n" ] } ], "source": [ - "const config5 = {\n", - " configurable: {\n", - " sessionId: \"abc6\"\n", - " }\n", - "};\n", + "const config5 = { configurable: { thread_id: uuidv4() }}\n", + "const input8 = {\n", + " // highlight-next-line\n", + " messages: [...messages, new HumanMessage(\"What is my name?\")],\n", + " language: \"English\"\n", + "}\n", "\n", - "const stream = await withMessageHistory2.stream({\n", - " input: \"hi! I'm todd. tell me a joke\",\n", - " chat_history: [],\n", - "}, config5);\n", + "const output9 = await app4.invoke(\n", + " input8,\n", + " config5,\n", + ")\n", + "console.log(output9.messages[output9.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But if we ask about information that is within the last few messages, it remembers:" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-ABUdChq5JOMhcFA1dB7PvCHLyliwM\",\n", + " \"content\": \"You asked for the solution to the math problem \\\"what's 2 + 2,\\\" and I answered that it equals 4.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 27,\n", + " \"promptTokens\": 99,\n", + " \"totalTokens\": 126\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_1bb46167f9\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 99,\n", + " \"output_tokens\": 27,\n", + " \"total_tokens\": 126\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "const config6 = { configurable: { thread_id: uuidv4() }}\n", + "const input9 = {\n", + " // highlight-next-line\n", + " messages: [...messages, new HumanMessage(\"What math problem did I ask?\")],\n", + " language: \"English\"\n", + "}\n", "\n", - "for await (const chunk of stream) {\n", - " console.log(\"|\", chunk.content);\n", - "}" + "const output10 = await app4.invoke(\n", + " input9,\n", + " config6,\n", + ")\n", + "console.log(output10.messages[output10.messages.length - 1]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you take a look at LangSmith, you can see exactly what is happening under the hood in the [LangSmith trace](https://smith.langchain.com/public/bf1b1a10-0fe0-42f6-9f0f-b70d9f7520dc/r)." ] }, { @@ -771,12 +1115,14 @@ "Now that you understand the basics of how to create a chatbot in LangChain, some more advanced tutorials you may be interested in are:\n", "\n", "- [Conversational RAG](/docs/tutorials/qa_chat_history): Enable a chatbot experience over an external source of data\n", - "- [Agents](https://langchain-ai.github.io/langgraphjs/tutorials/quickstart/): Build a chatbot that can take actions\n", + "- [Agents](https://langchain-ai.github.io/langgraphjs/tutorials/multi_agent/agent_supervisor/): Build a chatbot that can take actions\n", "\n", "If you want to dive deeper on specifics, some things worth checking out are:\n", "\n", "- [Streaming](/docs/how_to/streaming): streaming is *crucial* for chat applications\n", - "- [How to add message history](/docs/how_to/message_history): for a deeper dive into all things related to message history" + "- [How to add message history](/docs/how_to/message_history): for a deeper dive into all things related to message history\n", + "- [How to manage large message history](/docs/how_to/trim_messages/): more techniques for managing a large chat history\n", + "- [LangGraph main docs](https://langchain-ai.github.io/langgraph/): for more detail on building with LangGraph" ] } ], @@ -787,12 +1133,15 @@ "name": "deno" }, "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, "file_extension": ".ts", - "mimetype": "text/x.typescript", + "mimetype": "text/typescript", "name": "typescript", - "nb_converter": "script", - "pygments_lexer": "typescript", - "version": "5.3.3" + "version": "3.7.2" } }, "nbformat": 4, diff --git a/docs/core_docs/docs/tutorials/qa_chat_history.ipynb b/docs/core_docs/docs/tutorials/qa_chat_history.ipynb index b6bd8146c71a..b1694c4413ab 100644 --- a/docs/core_docs/docs/tutorials/qa_chat_history.ipynb +++ b/docs/core_docs/docs/tutorials/qa_chat_history.ipynb @@ -1,390 +1,1438 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Conversational RAG\n", - "\n", - ":::info Prerequisites\n", - "\n", - "This guide assumes familiarity with the following concepts:\n", - "\n", - "- [Chat history](/docs/concepts/#chat-history)\n", - "- [Chat models](/docs/concepts/#chat-models)\n", - "- [Embeddings](/docs/concepts/#embedding-models)\n", - "- [Vector stores](/docs/concepts/#vector-stores)\n", - "- [Retrieval-augmented generation](/docs/tutorials/rag/)\n", - "- [Tools](/docs/concepts/#tools)\n", - "- [Agents](/docs/concepts/#agents)\n", - "\n", - ":::\n", - "\n", - "In many Q&A applications we want to allow the user to have a back-and-forth conversation, meaning the application needs some sort of \"memory\" of past questions and answers, and some logic for incorporating those into its current thinking.\n", - "\n", - "In this guide we focus on **adding logic for incorporating historical messages.** Further details on chat history management is [covered here](/docs/how_to/message_history).\n", - "\n", - "We will cover two approaches:\n", - "\n", - "1. Chains, in which we always execute a retrieval step;\n", - "2. Agents, in which we give an LLM discretion over whether and how to execute a retrieval step (or multiple steps).\n", - "\n", - "For the external knowledge source, we will use the same [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng from the [RAG tutorial](/docs/tutorials/rag)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup\n", - "### Dependencies\n", - "\n", - "We’ll use an OpenAI chat model and embeddings and a Memory vector store in this walkthrough, but everything shown here works with any [ChatModel](/docs/concepts/#chat-models) or [LLM](/docs/concepts#llms), [Embeddings](/docs/concepts#embedding-models), and [VectorStore](/docs/concepts#vectorstores) or [Retriever](/docs/concepts#retrievers).\n", - "\n", - "We’ll use the following packages:\n", - "\n", - "```bash\n", - "npm install --save langchain @langchain/openai cheerio\n", - "```\n", - "\n", - "We need to set environment variable `OPENAI_API_KEY`:\n", - "\n", - "```bash\n", - "export OPENAI_API_KEY=YOUR_KEY\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### LangSmith\n", - "\n", - "Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with [LangSmith](https://smith.langchain.com/).\n", - "\n", - "Note that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\n", - "\n", - "\n", - "```bash\n", - "export LANGCHAIN_TRACING_V2=true\n", - "export LANGCHAIN_API_KEY=YOUR_KEY\n", - "\n", - "# Reduce tracing latency if you are not in a serverless environment\n", - "# export LANGCHAIN_CALLBACKS_BACKGROUND=true\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Initial setup" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import \"cheerio\";\n", - "import { CheerioWebBaseLoader } from \"@langchain/community/document_loaders/web/cheerio\";\n", - "import { RecursiveCharacterTextSplitter } from \"langchain/text_splitter\";\n", - "import { MemoryVectorStore } from \"langchain/vectorstores/memory\"\n", - "import { OpenAIEmbeddings, ChatOpenAI } from \"@langchain/openai\";\n", - "import { pull } from \"langchain/hub\";\n", - "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", - "import { RunnableSequence, RunnablePassthrough } from \"@langchain/core/runnables\";\n", - "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n", - "\n", - "import { createStuffDocumentsChain } from \"langchain/chains/combine_documents\";\n", - "\n", - "const loader = new CheerioWebBaseLoader(\n", - " \"https://lilianweng.github.io/posts/2023-06-23-agent/\"\n", - ");\n", - "\n", - "const docs = await loader.load();\n", - "\n", - "const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });\n", - "const splits = await textSplitter.splitDocuments(docs);\n", - "const vectorStore = await MemoryVectorStore.fromDocuments(splits, new OpenAIEmbeddings());\n", - "\n", - "// Retrieve and generate using the relevant snippets of the blog.\n", - "const retriever = vectorStore.asRetriever();\n", - "const prompt = await pull(\"rlm/rag-prompt\");\n", - "const llm = new ChatOpenAI({ model: \"gpt-3.5-turbo\", temperature: 0 });\n", - "const ragChain = await createStuffDocumentsChain({\n", - " llm,\n", - " prompt,\n", - " outputParser: new StringOutputParser(),\n", - "});" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's see what this prompt actually looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n", - "Question: {question} \n", - "Context: {context} \n", - "Answer:\n" - ] - } - ], - "source": [ - "console.log(prompt.promptMessages.map((msg) => msg.prompt.template).join(\"\\n\"));" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\u001b[32m\"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. I\"\u001b[39m... 208 more characters" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await ragChain.invoke({\n", - " context: await retriever.invoke(\"What is Task Decomposition?\"),\n", - " question: \"What is Task Decomposition?\"\n", - "});" - ] - }, + "cells": [ + { + "cell_type": "raw", + "id": "023635f2-71cf-43f2-a2e2-a7b4ced30a74", + "metadata": {}, + "source": [ + "---\n", + "sidebar_position: 2\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "86fc5bb2-017f-434e-8cd6-53ab214a5604", + "metadata": {}, + "source": [ + "# Conversational RAG\n", + "\n", + ":::info Prerequisites\n", + "\n", + "This guide assumes familiarity with the following concepts:\n", + "\n", + "- [Chat history](/docs/concepts/#chat-history)\n", + "- [Chat models](/docs/concepts/#chat-models)\n", + "- [Embeddings](/docs/concepts/#embedding-models)\n", + "- [Vector stores](/docs/concepts/#vector-stores)\n", + "- [Retrieval-augmented generation](/docs/tutorials/rag/)\n", + "- [Tools](/docs/concepts/#tools)\n", + "- [Agents](/docs/concepts/#agents)\n", + "\n", + ":::\n", + "\n", + "In many Q&A applications we want to allow the user to have a back-and-forth conversation, meaning the application needs some sort of \"memory\" of past questions and answers, and some logic for incorporating those into its current thinking.\n", + "\n", + "In this guide we focus on **adding logic for incorporating historical messages.** Further details on chat history management is [covered here](/docs/how_to/message_history).\n", + "\n", + "We will cover two approaches:\n", + "\n", + "1. Chains, in which we always execute a retrieval step;\n", + "2. Agents, in which we give an LLM discretion over whether and how to execute a retrieval step (or multiple steps).\n", + "\n", + "For the external knowledge source, we will use the same [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng from the [RAG tutorial](/docs/tutorials/rag)." + ] + }, + { + "cell_type": "markdown", + "id": "487d8d79-5ee9-4aa4-9fdf-cd5f4303e099", + "metadata": {}, + "source": [ + "## Setup\n", + "### Dependencies\n", + "\n", + "We’ll use an OpenAI chat model and embeddings and a Memory vector store in this walkthrough, but everything shown here works with any [ChatModel](/docs/concepts/#chat-models) or [LLM](/docs/concepts#llms), [Embeddings](/docs/concepts#embedding-models), and [VectorStore](/docs/concepts#vectorstores) or [Retriever](/docs/concepts#retrievers).\n", + "\n", + "We’ll use the following packages:\n", + "\n", + "```bash\n", + "npm install --save langchain @langchain/openai langchain cheerio\n", + "```\n", + "\n", + "We need to set environment variable `OPENAI_API_KEY`:\n", + "\n", + "```bash\n", + "export OPENAI_API_KEY=YOUR_KEY\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "1665e740-ce01-4f09-b9ed-516db0bd326f", + "metadata": {}, + "source": [ + "### LangSmith\n", + "\n", + "Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with [LangSmith](https://docs.smith.langchain.com).\n", + "\n", + "Note that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\n", + "\n", + "\n", + "```bash\n", + "export LANGCHAIN_TRACING_V2=true\n", + "export LANGCHAIN_API_KEY=YOUR_KEY\n", + "\n", + "# Reduce tracing latency if you are not in a serverless environment\n", + "# export LANGCHAIN_CALLBACKS_BACKGROUND=true\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "fa6ba684-26cf-4860-904e-a4d51380c134", + "metadata": {}, + "source": [ + "## Chains {#chains}\n" + ] + }, + { + "cell_type": "markdown", + "id": "7d2cf4ef", + "metadata": {}, + "source": [ + "\n", + "Let's first revisit the Q&A app we built over the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng in the [RAG tutorial](/docs/tutorials/rag)." + ] + }, + { + "cell_type": "markdown", + "id": "646840fb-5212-48ea-8bc7-ec7be5ec727e", + "metadata": {}, + "source": [ + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb58f273-2111-4a9b-8932-9b64c95030c8", + "metadata": {}, + "outputs": [], + "source": [ + "// @lc-docs-hide-cell\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const llm = new ChatOpenAI({ model: \"gpt-4o\" });" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "820244ae-74b4-4593-b392-822979dd91b8", + "metadata": {}, + "outputs": [], + "source": [ + "import { CheerioWebBaseLoader } from \"@langchain/community/document_loaders/web/cheerio\";\n", + "import { RecursiveCharacterTextSplitter } from \"langchain/text_splitter\";\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n", + "import { createRetrievalChain } from \"langchain/chains/retrieval\";\n", + "import { createStuffDocumentsChain } from \"langchain/chains/combine_documents\";\n", + "\n", + "// 1. Load, chunk and index the contents of the blog to create a retriever.\n", + "const loader = new CheerioWebBaseLoader(\n", + " \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", + " {\n", + " selector: \".post-content, .post-title, .post-header\"\n", + " }\n", + ");\n", + "const docs = await loader.load();\n", + "\n", + "const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });\n", + "const splits = await textSplitter.splitDocuments(docs);\n", + "const vectorstore = await MemoryVectorStore.fromDocuments(splits, new OpenAIEmbeddings());\n", + "const retriever = vectorstore.asRetriever();\n", + "\n", + "// 2. Incorporate the retriever into a question-answering chain.\n", + "const systemPrompt = \n", + " \"You are an assistant for question-answering tasks. \" +\n", + " \"Use the following pieces of retrieved context to answer \" +\n", + " \"the question. If you don't know the answer, say that you \" +\n", + " \"don't know. Use three sentences maximum and keep the \" +\n", + " \"answer concise.\" +\n", + " \"\\n\\n\" +\n", + " \"{context}\";\n", + "\n", + "const prompt = ChatPromptTemplate.fromMessages([\n", + " [\"system\", systemPrompt],\n", + " [\"human\", \"{input}\"],\n", + "]);\n", + "\n", + "const questionAnswerChain = await createStuffDocumentsChain({\n", + " llm,\n", + " prompt,\n", + "});\n", + "\n", + "const ragChain = await createRetrievalChain({\n", + " retriever,\n", + " combineDocsChain: questionAnswerChain,\n", + "});" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bf55faaf-0d17-4b74-925d-c478b555f7b2", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Contextualizing the question\n", - "\n", - "First we'll need to define a sub-chain that takes historical messages and the latest user question, and reformulates the question if it makes reference to any information in the historical information.\n", - "\n", - "We'll use a prompt that includes a `MessagesPlaceholder` variable under the name \"chat_history\". This allows us to pass in a list of Messages to the prompt using the \"chat_history\" input key, and these messages will be inserted after the system message and before the human message containing the latest question." - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Task decomposition involves breaking down large and complex tasks into smaller, more manageable subgoals or steps. This approach helps agents or models efficiently handle intricate tasks by simplifying them into easier components. Task decomposition can be achieved through techniques like Chain of Thought, Tree of Thoughts, or by using task-specific instructions and human input.\n" + ] + } + ], + "source": [ + "const response = await ragChain.invoke({ input: \"What is Task Decomposition?\" });\n", + "console.log(response.answer);" + ] + }, + { + "cell_type": "markdown", + "id": "187404c7-db47-49c5-be29-9ecb96dc9afa", + "metadata": {}, + "source": [ + "Note that we have used the built-in chain constructors `createStuffDocumentsChain` and `createRetrievalChain`, so that the basic ingredients to our solution are:\n", + "\n", + "1. retriever;\n", + "2. prompt;\n", + "3. LLM.\n", + "\n", + "This will simplify the process of incorporating chat history.\n", + "\n", + "### Adding chat history\n", + "\n", + "The chain we have built uses the input query directly to retrieve relevant context. But in a conversational setting, the user query might require conversational context to be understood. For example, consider this exchange:\n", + "\n", + "> Human: \"What is Task Decomposition?\"\n", + ">\n", + "> AI: \"Task decomposition involves breaking down complex tasks into smaller and simpler steps to make them more manageable for an agent or model.\"\n", + ">\n", + "> Human: \"What are common ways of doing it?\"\n", + "\n", + "In order to answer the second question, our system needs to understand that \"it\" refers to \"Task Decomposition.\"\n", + "\n", + "We'll need to update two things about our existing app:\n", + "\n", + "1. **Prompt**: Update our prompt to support historical messages as an input.\n", + "2. **Contextualizing questions**: Add a sub-chain that takes the latest user question and reformulates it in the context of the chat history. This can be thought of simply as building a new \"history aware\" retriever. Whereas before we had:\n", + " - `query` -> `retriever` \n", + " Now we will have:\n", + " - `(query, conversation history)` -> `LLM` -> `rephrased query` -> `retriever`" + ] + }, + { + "cell_type": "markdown", + "id": "776ae958-cbdc-4471-8669-c6087436f0b5", + "metadata": {}, + "source": [ + "#### Contextualizing the question\n", + "\n", + "First we'll need to define a sub-chain that takes historical messages and the latest user question, and reformulates the question if it makes reference to any information in the historical information.\n", + "\n", + "We'll use a prompt that includes a `MessagesPlaceholder` variable under the name \"chat_history\". This allows us to pass in a list of Messages to the prompt using the \"chat_history\" input key, and these messages will be inserted after the system message and before the human message containing the latest question.\n", + "\n", + "Note that we leverage a helper function [createHistoryAwareRetriever](https://api.js.langchain.com/functions/langchain.chains_history_aware_retriever.createHistoryAwareRetriever.html) for this step, which manages the case where `chat_history` is empty, and otherwise applies `prompt.pipe(llm).pipe(new StringOutputParser()).pipe(retriever)` in sequence.\n", + "\n", + "`createHistoryAwareRetriever` constructs a chain that accepts keys `input` and `chat_history` as input, and has the same output schema as a retriever." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2b685428-8b82-4af1-be4f-7232c5d55b73", + "metadata": {}, + "outputs": [], + "source": [ + "import { createHistoryAwareRetriever } from \"langchain/chains/history_aware_retriever\";\n", + "import { MessagesPlaceholder } from \"@langchain/core/prompts\";\n", + "\n", + "const contextualizeQSystemPrompt = \n", + " \"Given a chat history and the latest user question \" +\n", + " \"which might reference context in the chat history, \" +\n", + " \"formulate a standalone question which can be understood \" +\n", + " \"without the chat history. Do NOT answer the question, \" +\n", + " \"just reformulate it if needed and otherwise return it as is.\";\n", + "\n", + "const contextualizeQPrompt = ChatPromptTemplate.fromMessages([\n", + " [\"system\", contextualizeQSystemPrompt],\n", + " new MessagesPlaceholder(\"chat_history\"),\n", + " [\"human\", \"{input}\"],\n", + "]);\n", + "\n", + "const historyAwareRetriever = await createHistoryAwareRetriever({\n", + " llm,\n", + " retriever,\n", + " rephrasePrompt: contextualizeQPrompt,\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "42a47168-4a1f-4e39-bd2d-d5b03609a243", + "metadata": {}, + "source": [ + "This chain prepends a rephrasing of the input query to our retriever, so that the retrieval incorporates the context of the conversation.\n", + "\n", + "Now we can build our full QA chain. This is as simple as updating the retriever to be our new `historyAwareRetriever`.\n", + "\n", + "Again, we will use [createStuffDocumentsChain](https://api.js.langchain.com/functions/langchain.chains_combine_documents.createStuffDocumentsChain.html) to generate a `questionAnswerChain2`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer. A more detailed explaination is over [here](/docs/tutorials/rag/#built-in-chains)\n", + "\n", + "We build our final `ragChain2` with [createRetrievalChain](https://api.js.langchain.com/functions/langchain.chains_retrieval.createRetrievalChain.html). This chain applies the `historyAwareRetriever` and `questionAnswerChain2` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "66f275f3-ddef-4678-b90d-ee64576878f9", + "metadata": {}, + "outputs": [], + "source": [ + "const qaPrompt = ChatPromptTemplate.fromMessages([\n", + " [\"system\", systemPrompt],\n", + " new MessagesPlaceholder(\"chat_history\"),\n", + " [\"human\", \"{input}\"],\n", + "]);\n", + "\n", + "const questionAnswerChain2 = await createStuffDocumentsChain({\n", + " llm,\n", + " prompt: qaPrompt,\n", + "});\n", + "\n", + "const ragChain2 = await createRetrievalChain({\n", + " retriever: historyAwareRetriever,\n", + " combineDocsChain: questionAnswerChain2,\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "1ba1ae56-7ecb-4563-b792-50a1a5042df3", + "metadata": {}, + "source": [ + "Let's try this. Below we ask a question and a follow-up question that requires contextualization to return a sensible response. Because our chain includes a `\"chat_history\"` input, the caller needs to manage the chat history. We can achieve this by appending input and output messages to a list:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0005810b-1b95-4666-a795-08d80e478b83", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\";\n", - "\n", - "const contextualizeQSystemPrompt = `Given a chat history and the latest user question\n", - "which might reference context in the chat history, formulate a standalone question\n", - "which can be understood without the chat history. Do NOT answer the question,\n", - "just reformulate it if needed and otherwise return it as is.`;\n", - "\n", - "const contextualizeQPrompt = ChatPromptTemplate.fromMessages([\n", - " [\"system\", contextualizeQSystemPrompt],\n", - " new MessagesPlaceholder(\"chat_history\"),\n", - " [\"human\", \"{question}\"]\n", - "]);\n", - "const contextualizeQChain = contextualizeQPrompt.pipe(llm).pipe(new StringOutputParser());" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Common ways of doing Task Decomposition include:\n", + "1. Using simple prompting with an LLM, such as asking it to outline steps or subgoals for a task.\n", + "2. Employing task-specific instructions, like \"Write a story outline\" for writing a novel.\n", + "3. Incorporating human inputs for guidance.\n", + "Additionally, advanced approaches like Chain of Thought (CoT) and Tree of Thoughts (ToT) can further refine the process, and using an external classical planner with PDDL (as in LLM+P) is another option.\n" + ] + } + ], + "source": [ + "import { BaseMessage, HumanMessage, AIMessage } from \"@langchain/core/messages\";\n", + "\n", + "let chatHistory: BaseMessage[] = [];\n", + "\n", + "const question = \"What is Task Decomposition?\";\n", + "const aiMsg1 = await ragChain2.invoke({ input: question, chat_history: chatHistory });\n", + "chatHistory = chatHistory.concat([\n", + " new HumanMessage(question),\n", + " new AIMessage(aiMsg1.answer),\n", + "]);\n", + "\n", + "const secondQuestion = \"What are common ways of doing it?\";\n", + "const aiMsg2 = await ragChain2.invoke({ input: secondQuestion, chat_history: chatHistory });\n", + "\n", + "console.log(aiMsg2.answer);" + ] + }, + { + "cell_type": "markdown", + "id": "53a662c2-f38b-45f9-95c4-66de15637614", + "metadata": {}, + "source": [ + "#### Stateful management of chat history\n", + "\n", + "Here we've gone over how to add application logic for incorporating historical outputs, but we're still manually updating the chat history and inserting it into each input. In a real Q&A application we'll want some way of persisting chat history and some way of automatically inserting and updating it.\n", + "\n", + "For this we can use:\n", + "\n", + "- [BaseChatMessageHistory](https://api.js.langchain.com/classes/_langchain_core.chat_history.BaseChatMessageHistory.html): Store chat history.\n", + "- [RunnableWithMessageHistory](/docs/how_to/message_history): Wrapper for an LCEL chain and a `BaseChatMessageHistory` that handles injecting chat history into inputs and updating it after each invocation.\n", + "\n", + "For a detailed walkthrough of how to use these classes together to create a stateful conversational chain, head to the [How to add message history (memory)](/docs/how_to/message_history) LCEL page.\n", + "\n", + "Instances of `RunnableWithMessageHistory` manage the chat history for you. They accept a config with a key (`\"sessionId\"` by default) that specifies what conversation history to fetch and prepend to the input, and append the output to the same conversation history. Below is an example:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "9c3fb176-8d6a-4dc7-8408-6a22c5f7cc72", + "metadata": {}, + "outputs": [], + "source": [ + "import { RunnableWithMessageHistory } from \"@langchain/core/runnables\";\n", + "import { ChatMessageHistory } from \"langchain/stores/message/in_memory\";\n", + "\n", + "const demoEphemeralChatMessageHistoryForChain = new ChatMessageHistory();\n", + "\n", + "const conversationalRagChain = new RunnableWithMessageHistory({\n", + " runnable: ragChain2,\n", + " getMessageHistory: (_sessionId) => demoEphemeralChatMessageHistoryForChain,\n", + " inputMessagesKey: \"input\",\n", + " historyMessagesKey: \"chat_history\",\n", + " outputMessagesKey: \"answer\",\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "1046c92f-21b3-4214-907d-92878d8cba23", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using this chain we can ask follow-up questions that reference past messages and have them reformulated into standalone questions:" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Task Decomposition involves breaking down complicated tasks into smaller, more manageable subgoals. Techniques such as the Chain of Thought (CoT) and Tree of Thoughts extend this by decomposing problems into multiple thought steps and exploring multiple reasoning possibilities at each step. LLMs can perform task decomposition using simple prompts, task-specific instructions, or human inputs, and some approaches like LLM+P involve using external classical planners.\n" + ] + } + ], + "source": [ + "const result1 = await conversationalRagChain.invoke(\n", + " { input: \"What is Task Decomposition?\" },\n", + " { configurable: { sessionId: \"abc123\" } }\n", + ");\n", + "console.log(result1.answer);" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0e89c75f-7ad7-4331-a2fe-57579eb8f840", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\u001b[32m'What is the definition of \"large\" in the context of a language model?'\u001b[39m" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import { AIMessage, HumanMessage } from \"@langchain/core/messages\";\n", - "\n", - "await contextualizeQChain.invoke({\n", - " chat_history: [\n", - " new HumanMessage(\"What does LLM stand for?\"),\n", - " new AIMessage(\"Large language model\") \n", - " ],\n", - " question: \"What is meant by large\",\n", - "})" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Common ways of doing task decomposition include:\n", + "\n", + "1. Using simple prompting with an LLM, such as \"Steps for XYZ.\\n1.\" or \"What are the subgoals for achieving XYZ?\"\n", + "2. Utilizing task-specific instructions, like \"Write a story outline.\" for writing a novel.\n", + "3. Incorporating human inputs to guide and refine the decomposition process. \n", + "\n", + "Additionally, the LLM+P approach utilizes an external classical planner, involving PDDL to describe and plan complex tasks.\n" + ] + } + ], + "source": [ + "const result2 = await conversationalRagChain.invoke(\n", + " { input: \"What are common ways of doing it?\" },\n", + " { configurable: { sessionId: \"abc123\" } }\n", + ");\n", + "console.log(result2.answer);" + ] + }, + { + "cell_type": "markdown", + "id": "0ab1ded4-76d9-453f-9b9b-db9a4560c737", + "metadata": {}, + "source": [ + "### Tying it together" + ] + }, + { + "cell_type": "markdown", + "id": "8a08a5ea-df5b-4547-93c6-2a3940dd5c3e", + "metadata": {}, + "source": [ + "![](../../static/img/conversational_retrieval_chain.png)\n", + "\n", + "For convenience, we tie together all of the necessary steps in a single code cell:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "71c32048-1a41-465f-a9e2-c4affc332fd9", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Chain with chat history\n", - "\n", - "And now we can build our full QA chain. \n", - "\n", - "Notice we add some routing functionality to only run the \"condense question chain\" when our chat history isn't empty. Here we're taking advantage of the fact that if a function in an LCEL chain returns another chain, that chain will itself be invoked." - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "{ input: 'What is Task Decomposition?' }\n", + "----\n", + "{ chat_history: [] }\n", + "----\n", + "{\n", + " context: [\n", + " Document {\n", + " pageContent: 'Fig. 1. Overview of a LLM-powered autonomous agent system.\\n' +\n", + " 'Component One: Planning#\\n' +\n", + " 'A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\n' +\n", + " 'Task Decomposition#\\n' +\n", + " 'Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\n' +\n", + " 'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.',\n", + " metadata: [Object],\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.\\n' +\n", + " 'Another quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\n' +\n", + " 'Self-Reflection#',\n", + " metadata: [Object],\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Planning\\n' +\n", + " '\\n' +\n", + " 'Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\n' +\n", + " 'Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'Memory\\n' +\n", + " '\\n' +\n", + " 'Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\n' +\n", + " 'Long-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n' +\n", + " '\\n' +\n", + " '\\n' +\n", + " 'Tool use\\n' +\n", + " '\\n' +\n", + " 'The agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.',\n", + " metadata: [Object],\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Resources:\\n' +\n", + " '1. Internet access for searches and information gathering.\\n' +\n", + " '2. Long Term memory management.\\n' +\n", + " '3. GPT-3.5 powered Agents for delegation of simple tasks.\\n' +\n", + " '4. File output.\\n' +\n", + " '\\n' +\n", + " 'Performance Evaluation:\\n' +\n", + " '1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n' +\n", + " '2. Constructively self-criticize your big-picture behavior constantly.\\n' +\n", + " '3. Reflect on past decisions and strategies to refine your approach.\\n' +\n", + " '4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.',\n", + " metadata: [Object],\n", + " id: undefined\n", + " }\n", + " ]\n", + "}\n", + "----\n", + "{ answer: '' }\n", + "----\n", + "{ answer: 'Task' }\n", + "----\n", + "{ answer: ' decomposition' }\n", + "----\n", + "{ answer: ' involves' }\n", + "----\n", + "{ answer: ' breaking' }\n", + "----\n", + "{ answer: ' down' }\n", + "----\n", + "{ answer: ' a' }\n", + "----\n", + "{ answer: ' complex' }\n", + "----\n", + "{ answer: ' task' }\n", + "----\n", + "{ answer: ' into' }\n", + "----\n", + "{ answer: ' smaller' }\n", + "----\n", + "{ answer: ' and' }\n", + "----\n", + "{ answer: ' more' }\n", + "----\n", + "{ answer: ' manageable' }\n", + "----\n", + "{ answer: ' sub' }\n", + "----\n", + "{ answer: 'goals' }\n", + "----\n", + "{ answer: ' or' }\n", + "----\n", + "{ answer: ' steps' }\n", + "----\n", + "{ answer: '.' }\n", + "----\n", + "{ answer: ' This' }\n", + "----\n", + "{ answer: ' process' }\n", + "----\n", + "{ answer: ' allows' }\n", + "----\n", + "{ answer: ' an' }\n", + "----\n", + "{ answer: ' agent' }\n", + "----\n", + "{ answer: ' or' }\n", + "----\n", + "{ answer: ' model' }\n", + "----\n", + "{ answer: ' to' }\n", + "----\n", + "{ answer: ' efficiently' }\n", + "----\n", + "{ answer: ' handle' }\n", + "----\n", + "{ answer: ' intricate' }\n", + "----\n", + "{ answer: ' tasks' }\n", + "----\n", + "{ answer: ' by' }\n", + "----\n", + "{ answer: ' dividing' }\n", + "----\n", + "{ answer: ' them' }\n", + "----\n", + "{ answer: ' into' }\n", + "----\n", + "{ answer: ' simpler' }\n", + "----\n", + "{ answer: ' components' }\n", + "----\n", + "{ answer: '.' }\n", + "----\n", + "{ answer: ' Task' }\n", + "----\n", + "{ answer: ' decomposition' }\n", + "----\n", + "{ answer: ' can' }\n", + "----\n", + "{ answer: ' be' }\n", + "----\n", + "{ answer: ' achieved' }\n", + "----\n", + "{ answer: ' through' }\n", + "----\n", + "{ answer: ' techniques' }\n", + "----\n", + "{ answer: ' like' }\n", + "----\n", + "{ answer: ' Chain' }\n", + "----\n", + "{ answer: ' of' }\n", + "----\n", + "{ answer: ' Thought' }\n", + "----\n", + "{ answer: ',' }\n", + "----\n", + "{ answer: ' Tree' }\n", + "----\n", + "{ answer: ' of' }\n", + "----\n", + "{ answer: ' Thoughts' }\n", + "----\n", + "{ answer: ',' }\n", + "----\n", + "{ answer: ' or' }\n", + "----\n", + "{ answer: ' by' }\n", + "----\n", + "{ answer: ' using' }\n", + "----\n", + "{ answer: ' task' }\n", + "----\n", + "{ answer: '-specific' }\n", + "----\n", + "{ answer: ' instructions' }\n", + "----\n", + "{ answer: '.' }\n", + "----\n", + "{ answer: '' }\n", + "----\n", + "{ answer: '' }\n", + "----\n" + ] + } + ], + "source": [ + "import { CheerioWebBaseLoader } from \"@langchain/community/document_loaders/web/cheerio\";\n", + "import { RecursiveCharacterTextSplitter } from \"langchain/text_splitter\";\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\";\n", + "import { OpenAIEmbeddings, ChatOpenAI } from \"@langchain/openai\";\n", + "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\";\n", + "import { createHistoryAwareRetriever } from \"langchain/chains/history_aware_retriever\";\n", + "import { createStuffDocumentsChain } from \"langchain/chains/combine_documents\";\n", + "import { createRetrievalChain } from \"langchain/chains/retrieval\";\n", + "import { RunnableWithMessageHistory } from \"@langchain/core/runnables\";\n", + "import { ChatMessageHistory } from \"langchain/stores/message/in_memory\";\n", + "import { BaseChatMessageHistory } from \"@langchain/core/chat_history\";\n", + "\n", + "const llm2 = new ChatOpenAI({ model: \"gpt-3.5-turbo\", temperature: 0 });\n", + "\n", + "// Construct retriever\n", + "const loader2 = new CheerioWebBaseLoader(\n", + " \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", + " {\n", + " selector: \".post-content, .post-title, .post-header\"\n", + " }\n", + ");\n", + "\n", + "const docs2 = await loader2.load();\n", + "\n", + "const textSplitter2 = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });\n", + "const splits2 = await textSplitter2.splitDocuments(docs2);\n", + "const vectorstore2 = await MemoryVectorStore.fromDocuments(splits2, new OpenAIEmbeddings());\n", + "const retriever2 = vectorstore2.asRetriever();\n", + "\n", + "// Contextualize question\n", + "const contextualizeQSystemPrompt2 = \n", + " \"Given a chat history and the latest user question \" +\n", + " \"which might reference context in the chat history, \" +\n", + " \"formulate a standalone question which can be understood \" +\n", + " \"without the chat history. Do NOT answer the question, \" +\n", + " \"just reformulate it if needed and otherwise return it as is.\";\n", + "\n", + "const contextualizeQPrompt2 = ChatPromptTemplate.fromMessages([\n", + " [\"system\", contextualizeQSystemPrompt2],\n", + " new MessagesPlaceholder(\"chat_history\"),\n", + " [\"human\", \"{input}\"],\n", + "]);\n", + "\n", + "const historyAwareRetriever2 = await createHistoryAwareRetriever({\n", + " llm: llm2,\n", + " retriever: retriever2,\n", + " rephrasePrompt: contextualizeQPrompt2\n", + "});\n", + "\n", + "// Answer question\n", + "const systemPrompt2 = \n", + " \"You are an assistant for question-answering tasks. \" +\n", + " \"Use the following pieces of retrieved context to answer \" +\n", + " \"the question. If you don't know the answer, say that you \" +\n", + " \"don't know. Use three sentences maximum and keep the \" +\n", + " \"answer concise.\" +\n", + " \"\\n\\n\" +\n", + " \"{context}\";\n", + "\n", + "const qaPrompt2 = ChatPromptTemplate.fromMessages([\n", + " [\"system\", systemPrompt2],\n", + " new MessagesPlaceholder(\"chat_history\"),\n", + " [\"human\", \"{input}\"],\n", + "]);\n", + "\n", + "const questionAnswerChain3 = await createStuffDocumentsChain({\n", + " llm,\n", + " prompt: qaPrompt2,\n", + "});\n", + "\n", + "const ragChain3 = await createRetrievalChain({\n", + " retriever: historyAwareRetriever2,\n", + " combineDocsChain: questionAnswerChain3,\n", + "});\n", + "\n", + "// Statefully manage chat history\n", + "const store2: Record = {};\n", + "\n", + "function getSessionHistory2(sessionId: string): BaseChatMessageHistory {\n", + " if (!(sessionId in store2)) {\n", + " store2[sessionId] = new ChatMessageHistory();\n", + " }\n", + " return store2[sessionId];\n", + "}\n", + "\n", + "const conversationalRagChain2 = new RunnableWithMessageHistory({\n", + " runnable: ragChain3,\n", + " getMessageHistory: getSessionHistory2,\n", + " inputMessagesKey: \"input\",\n", + " historyMessagesKey: \"chat_history\",\n", + " outputMessagesKey: \"answer\",\n", + "});\n", + "\n", + "// Example usage\n", + "const query2 = \"What is Task Decomposition?\";\n", + "\n", + "for await (const s of await conversationalRagChain2.stream(\n", + " { input: query2 },\n", + " { configurable: { sessionId: \"unique_session_id\" } }\n", + ")) {\n", + " console.log(s);\n", + " console.log(\"----\");\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "861da8ed-d890-4fdc-a3bf-30433db61e0d", + "metadata": {}, + "source": [ + "## Agents {#agents}\n", + "\n", + "Agents leverage the reasoning capabilities of LLMs to make decisions during execution. Using agents allow you to offload some discretion over the retrieval process. Although their behavior is less predictable than chains, they offer some advantages in this context:\n", + "\n", + "- Agents generate the input to the retriever directly, without necessarily needing us to explicitly build in contextualization, as we did above;\n", + "- Agents can execute multiple retrieval steps in service of a query, or refrain from executing a retrieval step altogether (e.g., in response to a generic greeting from a user).\n", + "\n", + "### Retrieval tool\n", + "\n", + "Agents can access \"tools\" and manage their execution. In this case, we will convert our retriever into a LangChain tool to be wielded by the agent:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "809cc747-2135-40a2-8e73-e4556343ee64", + "metadata": {}, + "outputs": [], + "source": [ + "import { createRetrieverTool } from \"langchain/tools/retriever\";\n", + "\n", + "const tool = createRetrieverTool(\n", + " retriever,\n", + " {\n", + " name: \"blog_post_retriever\",\n", + " description: \"Searches and returns excerpts from the Autonomous Agents blog post.\",\n", + " }\n", + ")\n", + "const tools = [tool]" + ] + }, + { + "cell_type": "markdown", + "id": "07dcb968-ed9a-458a-85e1-528cd28c6965", + "metadata": {}, + "source": [ + "Tools are LangChain [Runnables](/docs/concepts#langchain-expression-language-lcel), and implement the usual interface:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "931c4fe3-c603-4efb-9b37-5f7cbbb1cbbd", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\"\n", - "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n", - "import { formatDocumentsAsString } from \"langchain/util/document\";\n", - "\n", - "const qaSystemPrompt = `You are an assistant for question-answering tasks.\n", - "Use the following pieces of retrieved context to answer the question.\n", - "If you don't know the answer, just say that you don't know.\n", - "Use three sentences maximum and keep the answer concise.\n", - "\n", - "{context}`\n", - "\n", - "const qaPrompt = ChatPromptTemplate.fromMessages([\n", - " [\"system\", qaSystemPrompt],\n", - " new MessagesPlaceholder(\"chat_history\"),\n", - " [\"human\", \"{question}\"]\n", - "]);\n", - "\n", - "const contextualizedQuestion = (input: Record) => {\n", - " if (\"chat_history\" in input) {\n", - " return contextualizeQChain;\n", - " }\n", - " return input.question;\n", - "};\n", - "\n", - "const ragChain = RunnableSequence.from([\n", - " RunnablePassthrough.assign({\n", - " context: (input: Record) => {\n", - " if (\"chat_history\" in input) {\n", - " const chain = contextualizedQuestion(input);\n", - " return chain.pipe(retriever).pipe(formatDocumentsAsString);\n", - " }\n", - " return \"\";\n", - " },\n", - " }),\n", - " qaPrompt,\n", - " llm\n", - "])" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.\n", + "Another quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\n", + "Self-Reflection#\n", + "\n", + "Fig. 1. Overview of a LLM-powered autonomous agent system.\n", + "Component One: Planning#\n", + "A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\n", + "Task Decomposition#\n", + "Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\n", + "Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\n", + "\n", + "(3) Task execution: Expert models execute on the specific tasks and log results.\n", + "Instruction:\n", + "\n", + "With the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\n", + "\n", + "Resources:\n", + "1. Internet access for searches and information gathering.\n", + "2. Long Term memory management.\n", + "3. GPT-3.5 powered Agents for delegation of simple tasks.\n", + "4. File output.\n", + "\n", + "Performance Evaluation:\n", + "1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n", + "2. Constructively self-criticize your big-picture behavior constantly.\n", + "3. Reflect on past decisions and strategies to refine your approach.\n", + "4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n" + ] + } + ], + "source": [ + "console.log(await tool.invoke({ query: \"task decomposition\" }))" + ] + }, + { + "cell_type": "markdown", + "id": "f77e0217-28be-4b8b-b4c4-9cc4ed5ec201", + "metadata": {}, + "source": [ + "### Agent constructor\n", + "\n", + "Now that we have defined the tools and the LLM, we can create the agent. We will be using [LangGraph](/docs/concepts/#langgraph) to construct the agent. \n", + "Currently we are using a high level interface to construct the agent, but the nice thing about LangGraph is that this high-level interface is backed by a low-level, highly controllable API in case you want to modify the agent logic." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "1726d151-4653-4c72-a187-a14840add526", + "metadata": {}, + "outputs": [], + "source": [ + "import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n", + "\n", + "const agentExecutor = createReactAgent({ llm, tools });" + ] + }, + { + "cell_type": "markdown", + "id": "6d5152ca-1c3b-4f58-bb28-f31c0be7ba66", + "metadata": {}, + "source": [ + "We can now try it out. Note that so far it is not stateful (we still need to add in memory)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "170403a2-c914-41db-85d8-a2c381da112d", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "AIMessage {\n", - " lc_serializable: true,\n", - " lc_kwargs: {\n", - " content: \"Task decomposition is a technique used to break down complex tasks into smaller and more manageable \"... 278 more characters,\n", - " additional_kwargs: { function_call: undefined, tool_calls: undefined }\n", - " },\n", - " lc_namespace: [ \"langchain_core\", \"messages\" ],\n", - " content: \"Task decomposition is a technique used to break down complex tasks into smaller and more manageable \"... 278 more characters,\n", - " name: undefined,\n", - " additional_kwargs: { function_call: undefined, tool_calls: undefined }\n", - "}\n" - ] - }, - { - "data": { - "text/plain": [ - "AIMessage {\n", - " lc_serializable: \u001b[33mtrue\u001b[39m,\n", - " lc_kwargs: {\n", - " content: \u001b[32m\"Common ways of task decomposition include using prompting techniques like Chain of Thought (CoT) or \"\u001b[39m... 332 more characters,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m }\n", - " },\n", - " lc_namespace: [ \u001b[32m\"langchain_core\"\u001b[39m, \u001b[32m\"messages\"\u001b[39m ],\n", - " content: \u001b[32m\"Common ways of task decomposition include using prompting techniques like Chain of Thought (CoT) or \"\u001b[39m... 332 more characters,\n", - " name: \u001b[90mundefined\u001b[39m,\n", - " additional_kwargs: { function_call: \u001b[90mundefined\u001b[39m, tool_calls: \u001b[90mundefined\u001b[39m }\n", - "}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "let chat_history = [];\n", - "\n", - "const question = \"What is task decomposition?\";\n", - "const aiMsg = await ragChain.invoke({ question, chat_history });\n", - "console.log(aiMsg)\n", - "chat_history = chat_history.concat(aiMsg);\n", - "\n", - "const secondQuestion = \"What are common ways of doing it?\";\n", - "await ragChain.invoke({ question: secondQuestion, chat_history });" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABABtUmgD1ZlOHZd0nD9TR8yb3mMe\",\n", + " \"content\": \"\",\n", + " \"additional_kwargs\": {\n", + " \"tool_calls\": [\n", + " {\n", + " \"id\": \"call_dWxEY41mg9VSLamVYHltsUxL\",\n", + " \"type\": \"function\",\n", + " \"function\": \"[Object]\"\n", + " }\n", + " ]\n", + " },\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 19,\n", + " \"promptTokens\": 66,\n", + " \"totalTokens\": 85\n", + " },\n", + " \"finish_reason\": \"tool_calls\",\n", + " \"system_fingerprint\": \"fp_3537616b13\"\n", + " },\n", + " \"tool_calls\": [\n", + " {\n", + " \"name\": \"blog_post_retriever\",\n", + " \"args\": {\n", + " \"query\": \"Task Decomposition\"\n", + " },\n", + " \"type\": \"tool_call\",\n", + " \"id\": \"call_dWxEY41mg9VSLamVYHltsUxL\"\n", + " }\n", + " ],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 66,\n", + " \"output_tokens\": 19,\n", + " \"total_tokens\": 85\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " tools: {\n", + " messages: [\n", + " ToolMessage {\n", + " \"content\": \"Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\n\\nTask decomposition can be done (1) by LLM with simple prompting like \\\"Steps for XYZ.\\\\n1.\\\", \\\"What are the subgoals for achieving XYZ?\\\", (2) by using task-specific instructions; e.g. \\\"Write a story outline.\\\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\n\\n(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.\",\n", + " \"name\": \"blog_post_retriever\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_call_id\": \"call_dWxEY41mg9VSLamVYHltsUxL\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABABuSj5FHmHFdeR2Pv7Cxcmq5aQz\",\n", + " \"content\": \"Task Decomposition is a technique that allows an agent to break down a complex task into smaller, more manageable subtasks or steps. The primary goal is to simplify the task to ensure efficient execution and better understanding. \\n\\n### Methods in Task Decomposition:\\n1. **Chain of Thought (CoT)**:\\n - **Description**: This technique involves instructing the model to “think step by step” to decompose hard tasks into smaller ones. It transforms large tasks into multiple manageable tasks, enhancing the model's performance and providing insight into its thinking process. \\n - **Example**: When given a complex problem, the model outlines sequential steps to reach a solution.\\n\\n2. **Tree of Thoughts**:\\n - **Description**: This extends CoT by exploring multiple reasoning possibilities at each step. The problem is decomposed into multiple thought steps, with several thoughts generated per step, forming a sort of decision tree.\\n - **Example**: For a given task, the model might consider various alternative actions at each stage, evaluating each before proceeding.\\n\\n3. **LLM with Prompts**:\\n - **Description**: Basic task decomposition can be done via simple prompts like \\\"Steps for XYZ\\\" or \\\"What are the subgoals for achieving XYZ?\\\" This can also be guided by task-specific instructions or human inputs when necessary.\\n - **Example**: Asking the model to list the subgoals for writing a novel might produce an outline broken down into chapters, character development, and plot points.\\n\\n4. **LLM+P**:\\n - **Description**: This approach involves outsourcing long-term planning to an external classical planner using Planning Domain Definition Language (PDDL). The task is translated into a PDDL problem by the model, planned using classical planning tools, and then translated back into natural language.\\n - **Example**: In robotics, translating a task into PDDL and then using a domain-specific planner to generate a sequence of actions.\\n\\n### Applications:\\n- **Planning**: Helps an agent plan tasks by breaking them into clear, manageable steps.\\n- **Self-Reflection**: Allows agents to reflect and refine their actions, learning from past mistakes to improve future performance.\\n- **Memory**: Utilizes short-term memory for immediate context and long-term memory for retaining and recalling information over extended periods.\\n- **Tool Use**: Enables the agent to call external APIs for additional information or capabilities not inherent in the model.\\n\\nIn essence, task decomposition leverages various methodologies to simplify complex tasks, ensuring better performance, improved reasoning, and effective task execution.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 522,\n", + " \"promptTokens\": 821,\n", + " \"totalTokens\": 1343\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 821,\n", + " \"output_tokens\": 522,\n", + " \"total_tokens\": 1343\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" + ] + } + ], + "source": [ + "const query = \"What is Task Decomposition?\";\n", + "\n", + "for await (const s of await agentExecutor.stream(\n", + " { messages: [new HumanMessage(query)] }\n", + ")) {\n", + " console.log(s);\n", + " console.log(\"----\");\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "1df703b1-aad6-48fb-b6fa-703e32ea88b9", + "metadata": {}, + "source": [ + "LangGraph comes with built in persistence, so we don't need to use ChatMessageHistory! Rather, we can pass in a checkpointer to our LangGraph agent directly" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "04a3a664-3c3f-4cd1-9995-26662a52da7c", + "metadata": {}, + "outputs": [], + "source": [ + "import { MemorySaver } from \"@langchain/langgraph\";\n", + "\n", + "const memory = new MemorySaver();\n", + "\n", + "const agentExecutorWithMemory = createReactAgent({ llm, tools, checkpointSaver: memory });" + ] + }, + { + "cell_type": "markdown", + "id": "02026f78-338e-4d18-9f05-131e1dd59197", + "metadata": {}, + "source": [ + "This is all we need to construct a conversational RAG agent.\n", + "\n", + "Let's observe its behavior. Note that if we input a query that does not require a retrieval step, the agent does not execute one:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "d6d70833-b958-4cd7-9e27-29c1c08bb1b8", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "See the first [LastSmith trace here](https://smith.langchain.com/public/527981c6-5018-4b68-a11a-ebcde77843e7/r) and the [second trace here](https://smith.langchain.com/public/7b97994a-ab9f-4bf3-a2e4-abb609e5610a/r)" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABACGc1vDPUSHYN7YVkuUMwpKR20P\",\n", + " \"content\": \"Hello, Bob! How can I assist you today?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 12,\n", + " \"promptTokens\": 64,\n", + " \"totalTokens\": 76\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_e375328146\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 64,\n", + " \"output_tokens\": 12,\n", + " \"total_tokens\": 76\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" + ] + } + ], + "source": [ + "const config = { configurable: { thread_id: \"abc123\" } };\n", + "\n", + "for await (const s of await agentExecutorWithMemory.stream(\n", + " { messages: [new HumanMessage(\"Hi! I'm bob\")] },\n", + " config\n", + ")) {\n", + " console.log(s);\n", + " console.log(\"----\");\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "a7928865-3dd6-4d36-abc6-2a30de770d09", + "metadata": {}, + "source": [ + "Further, if we input a query that does require a retrieval step, the agent generates the input to the tool:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "e2c570ae-dd91-402c-8693-ae746de63b16", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we've gone over how to add application logic for incorporating historical outputs, but we're still manually updating the chat history and inserting it into each input. In a real Q&A application we'll want some way of persisting chat history and some way of automatically inserting and updating it.\n", - "\n", - "For this we can use:\n", - "\n", - "- [BaseChatMessageHistory](https://api.js.langchain.com/classes/langchain_core.chat_history.BaseChatMessageHistory.html): Store chat history.\n", - "- [RunnableWithMessageHistory](/docs/how_to/message_history/): Wrapper for an LCEL chain and a `BaseChatMessageHistory` that handles injecting chat history into inputs and updating it after each invocation.\n", - "\n", - "For a detailed walkthrough of how to use these classes together to create a stateful conversational chain, head to the [How to add message history (memory)](/docs/how_to/message_history/) LCEL page." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABACI6WN7hkfJjFhIUBGt3TswtPOv\",\n", + " \"content\": \"\",\n", + " \"additional_kwargs\": {\n", + " \"tool_calls\": [\n", + " {\n", + " \"id\": \"call_Lys2G4TbOMJ6RBuVvKnFSK4V\",\n", + " \"type\": \"function\",\n", + " \"function\": \"[Object]\"\n", + " }\n", + " ]\n", + " },\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 19,\n", + " \"promptTokens\": 89,\n", + " \"totalTokens\": 108\n", + " },\n", + " \"finish_reason\": \"tool_calls\",\n", + " \"system_fingerprint\": \"fp_f82f5b050c\"\n", + " },\n", + " \"tool_calls\": [\n", + " {\n", + " \"name\": \"blog_post_retriever\",\n", + " \"args\": {\n", + " \"query\": \"Task Decomposition\"\n", + " },\n", + " \"type\": \"tool_call\",\n", + " \"id\": \"call_Lys2G4TbOMJ6RBuVvKnFSK4V\"\n", + " }\n", + " ],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 89,\n", + " \"output_tokens\": 19,\n", + " \"total_tokens\": 108\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " tools: {\n", + " messages: [\n", + " ToolMessage {\n", + " \"content\": \"Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\n\\nTask decomposition can be done (1) by LLM with simple prompting like \\\"Steps for XYZ.\\\\n1.\\\", \\\"What are the subgoals for achieving XYZ?\\\", (2) by using task-specific instructions; e.g. \\\"Write a story outline.\\\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\n\\n(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.\",\n", + " \"name\": \"blog_post_retriever\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_call_id\": \"call_Lys2G4TbOMJ6RBuVvKnFSK4V\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABACJu56eYSAyyMNaV9UEUwHS8vRu\",\n", + " \"content\": \"Task Decomposition is a method used to break down complicated tasks into smaller, more manageable steps. This approach leverages the \\\"Chain of Thought\\\" (CoT) technique, which prompts models to \\\"think step by step\\\" to enhance performance on complex tasks. Here’s a summary of the key concepts related to Task Decomposition:\\n\\n1. **Chain of Thought (CoT):**\\n - A prompting technique that encourages models to decompose hard tasks into simpler steps, transforming big tasks into multiple manageable sub-tasks.\\n - CoT helps to provide insights into the model’s thinking process.\\n\\n2. **Tree of Thoughts:**\\n - An extension of CoT, this approach explores multiple reasoning paths at each step.\\n - It creates a tree structure by generating multiple thoughts per step, and uses search methods like breadth-first search (BFS) or depth-first search (DFS) to explore these thoughts.\\n - Each state is evaluated by a classifier or majority vote.\\n\\n3. **Methods for Task Decomposition:**\\n - Simple prompting such as instructing with phrases like \\\"Steps for XYZ: 1., 2., 3.\\\" or \\\"What are the subgoals for achieving XYZ?\\\".\\n - Using task-specific instructions like \\\"Write a story outline\\\" for specific tasks such as writing a novel.\\n - Incorporating human inputs for better granularity.\\n\\n4. **LLM+P (Long-horizon Planning):**\\n - A method that involves using an external classical planner for long-horizon planning.\\n - The process involves translating the problem into a Planning Domain Definition Language (PDDL) problem, using a classical planner to generate a PDDL plan, and then translating it back into natural language.\\n\\nTask Decomposition is essential in planning complex tasks, allowing for efficient handling by breaking them into sub-tasks and sub-goals. This process is integral to the functioning of autonomous agent systems and enhances their capability to execute intricate tasks effectively.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 396,\n", + " \"promptTokens\": 844,\n", + " \"totalTokens\": 1240\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_9f2bfdaa89\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 844,\n", + " \"output_tokens\": 396,\n", + " \"total_tokens\": 1240\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" + ] } - ], - "metadata": { - "kernelspec": { - "display_name": "Deno", - "language": "typescript", - "name": "deno" - }, - "language_info": { - "file_extension": ".ts", - "mimetype": "text/x.typescript", - "name": "typescript", - "nb_converter": "script", - "pygments_lexer": "typescript", - "version": "5.3.3" + ], + "source": [ + "for await (const s of await agentExecutorWithMemory.stream(\n", + " { messages: [new HumanMessage(query)] },\n", + " config\n", + ")) {\n", + " console.log(s);\n", + " console.log(\"----\");\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "26eaae33-3c4e-49fc-9fc6-db8967e25579", + "metadata": {}, + "source": [ + "Above, instead of inserting our query verbatim into the tool, the agent stripped unnecessary words like \"what\" and \"is\".\n", + "\n", + "This same principle allows the agent to use the context of the conversation when necessary:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "570d8c68-136e-4ba5-969a-03ba195f6118", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABACPZzSugzrREQRO4mVQfI3cQOeL\",\n", + " \"content\": \"\",\n", + " \"additional_kwargs\": {\n", + " \"tool_calls\": [\n", + " {\n", + " \"id\": \"call_5nSZb396Tcg73Pok6Bx1XV8b\",\n", + " \"type\": \"function\",\n", + " \"function\": \"[Object]\"\n", + " }\n", + " ]\n", + " },\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 22,\n", + " \"promptTokens\": 1263,\n", + " \"totalTokens\": 1285\n", + " },\n", + " \"finish_reason\": \"tool_calls\",\n", + " \"system_fingerprint\": \"fp_9f2bfdaa89\"\n", + " },\n", + " \"tool_calls\": [\n", + " {\n", + " \"name\": \"blog_post_retriever\",\n", + " \"args\": {\n", + " \"query\": \"common ways of doing task decomposition\"\n", + " },\n", + " \"type\": \"tool_call\",\n", + " \"id\": \"call_5nSZb396Tcg73Pok6Bx1XV8b\"\n", + " }\n", + " ],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 1263,\n", + " \"output_tokens\": 22,\n", + " \"total_tokens\": 1285\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " tools: {\n", + " messages: [\n", + " ToolMessage {\n", + " \"content\": \"Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\n\\nTask decomposition can be done (1) by LLM with simple prompting like \\\"Steps for XYZ.\\\\n1.\\\", \\\"What are the subgoals for achieving XYZ?\\\", (2) by using task-specific instructions; e.g. \\\"Write a story outline.\\\" for writing a novel, or (3) with human inputs.\\nAnother quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains.\\nSelf-Reflection#\\n\\nPlanning\\n\\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n\\n\\nMemory\\n\\nShort-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\nLong-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n\\n\\nTool use\\n\\nThe agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more.\\n\\nResources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\",\n", + " \"name\": \"blog_post_retriever\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_call_id\": \"call_5nSZb396Tcg73Pok6Bx1XV8b\"\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n", + "{\n", + " agent: {\n", + " messages: [\n", + " AIMessage {\n", + " \"id\": \"chatcmpl-ABACQt9pT5dKCTaGQpVawcmCCWdET\",\n", + " \"content\": \"According to the blog post, common ways of performing Task Decomposition include:\\n\\n1. **Using Large Language Models (LLMs) with Simple Prompting:**\\n - Providing clear and structured prompts such as \\\"Steps for XYZ: 1., 2., 3.\\\" or asking \\\"What are the subgoals for achieving XYZ?\\\"\\n - This allows the model to break down the tasks step-by-step.\\n\\n2. **Task-Specific Instructions:**\\n - Employing specific instructions tailored to the task at hand, for example, \\\"Write a story outline\\\" for writing a novel.\\n - These instructions guide the model in decomposing the task appropriately.\\n\\n3. **Involving Human Inputs:**\\n - Integrating insights and directives from humans to aid in the decomposition process.\\n - This can ensure that the breakdown is comprehensive and accurately reflects the nuances of the task.\\n\\n4. **LLM+P Approach for Long-Horizon Planning:**\\n - Utilizing an external classical planner by translating the problem into Planning Domain Definition Language (PDDL).\\n - The process involves:\\n 1. Translating the problem into “Problem PDDL”.\\n 2. Requesting a classical planner to generate a PDDL plan based on an existing “Domain PDDL”.\\n 3. Translating the PDDL plan back into natural language.\\n\\nThese methods enable effective management and execution of complex tasks by transforming them into simpler, more manageable components.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 292,\n", + " \"promptTokens\": 2010,\n", + " \"totalTokens\": 2302\n", + " },\n", + " \"finish_reason\": \"stop\",\n", + " \"system_fingerprint\": \"fp_9f2bfdaa89\"\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 2010,\n", + " \"output_tokens\": 292,\n", + " \"total_tokens\": 2302\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "----\n" + ] } + ], + "source": [ + "const query3 = \"What according to the blog post are common ways of doing it? redo the search\";\n", + "\n", + "for await (const s of await agentExecutorWithMemory.stream(\n", + " { messages: [new HumanMessage(query3)] },\n", + " config\n", + ")) {\n", + " console.log(s);\n", + " console.log(\"----\");\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "f2724616-c106-4e15-a61a-3077c535f692", + "metadata": {}, + "source": [ + "Note that the agent was able to infer that \"it\" in our query refers to \"task decomposition\", and generated a reasonable search query as a result-- in this case, \"common ways of task decomposition\"." + ] + }, + { + "cell_type": "markdown", + "id": "1cf87847-23bb-4672-b41c-12ad9cf81ed4", + "metadata": {}, + "source": [ + "### Tying it together\n", + "\n", + "For convenience, we tie together all of the necessary steps in a single code cell:" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "b1d2b4d4-e604-497d-873d-d345b808578e", + "metadata": {}, + "outputs": [], + "source": [ + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "import { MemorySaver } from \"@langchain/langgraph\";\n", + "import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n", + "import { CheerioWebBaseLoader } from \"@langchain/community/document_loaders/web/cheerio\";\n", + "import { RecursiveCharacterTextSplitter } from \"langchain/text_splitter\";\n", + "import { MemoryVectorStore } from \"langchain/vectorstores/memory\";\n", + "import { createRetrieverTool } from \"langchain/tools/retriever\";\n", + "\n", + "const memory3 = new MemorySaver();\n", + "const llm3 = new ChatOpenAI({ model: \"gpt-4o\", temperature: 0 });\n", + "\n", + "// Construct retriever\n", + "const loader3 = new CheerioWebBaseLoader(\n", + " \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", + " {\n", + " selector: \".post-content, .post-title, .post-header\"\n", + " }\n", + ");\n", + "\n", + "const docs3 = await loader3.load();\n", + "\n", + "const textSplitter3 = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });\n", + "const splits3 = await textSplitter3.splitDocuments(docs3);\n", + "const vectorstore3 = await MemoryVectorStore.fromDocuments(splits3, new OpenAIEmbeddings());\n", + "const retriever3 = vectorstore3.asRetriever();\n", + "\n", + "// Build retriever tool\n", + "const tool3 = createRetrieverTool(\n", + " retriever3,\n", + " {\n", + " name: \"blog_post_retriever\",\n", + " description: \"Searches and returns excerpts from the Autonomous Agents blog post.\",\n", + " }\n", + ");\n", + "const tools3 = [tool3];\n", + "\n", + "const agentExecutor3 = createReactAgent({ llm: llm3, tools: tools3, checkpointSaver: memory3 });" + ] + }, + { + "cell_type": "markdown", + "id": "cd6bf4f4-74f4-419d-9e26-f0ed83cf05fa", + "metadata": {}, + "source": [ + "## Next steps\n", + "\n", + "We've covered the steps to build a basic conversational Q&A application:\n", + "\n", + "- We used chains to build a predictable application that generates search queries for each user input;\n", + "- We used agents to build an application that \"decides\" when and how to generate search queries.\n", + "\n", + "To explore different types of retrievers and retrieval strategies, visit the [retrievers](/docs/how_to/#retrievers) section of the how-to guides.\n", + "\n", + "For a detailed walkthrough of LangChain's conversation memory abstractions, visit the [How to add message history (memory)](/docs/how_to/message_history) LCEL page." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" }, - "nbformat": 4, - "nbformat_minor": 2 + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/core_docs/docs/versions/migrating_memory/chat_history.ipynb b/docs/core_docs/docs/versions/migrating_memory/chat_history.ipynb new file mode 100644 index 000000000000..927aec36ad0c --- /dev/null +++ b/docs/core_docs/docs/versions/migrating_memory/chat_history.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c298a5c9-b9af-481d-9eba-cbd65f987a8a", + "metadata": {}, + "source": [ + "# How to use BaseChatMessageHistory with LangGraph\n", + "\n", + ":::info Prerequisites\n", + "\n", + "This guide assumes familiarity with the following concepts:\n", + "\n", + "- [Chat History](/docs/concepts/#chat-history)\n", + "- [RunnableWithMessageHistory](https://api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html)\n", + "- [LangGraph](https://langchain-ai.github.io/langgraphjs/concepts/high_level/)\n", + "- [Memory](https://langchain-ai.github.io/langgraphjs/concepts/agentic_concepts/#memory)\n", + "\n", + ":::\n", + "\n", + "We recommend that new LangChain applications take advantage of the [built-in LangGraph peristence](https://langchain-ai.github.io/langgraph/concepts/persistence/) to implement memory.\n", + "\n", + "In some situations, users may need to keep using an existing persistence solution for chat message history.\n", + "\n", + "Here, we will show how to use [LangChain chat message histories](/docs/integrations/memory/) (implementations of [BaseChatMessageHistory](https://api.js.langchain.com/classes/_langchain_core.chat_history.BaseChatMessageHistory.html)) with LangGraph." + ] + }, + { + "cell_type": "markdown", + "id": "548bc988-167b-43f1-860a-d247e28b2b42", + "metadata": {}, + "source": [ + "## Set up\n", + "\n", + "```typescript\n", + "process.env.ANTHROPIC_API_KEY = 'YOUR_API_KEY'\n", + "```\n", + "\n", + "```{=mdx}\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\"\n", + "\n", + "\n", + " @langchain/core @langchain/langgraph @langchain/anthropic\n", + "\n", + "```" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c5e08659-b68c-48f2-8b33-e79b0c6999e1", + "metadata": {}, + "source": [ + "## ChatMessageHistory\n", + "\n", + "A message history needs to be parameterized by a conversation ID or maybe by the 2-tuple of (user ID, conversation ID).\n", + "\n", + "Many of the [LangChain chat message histories](/docs/integrations/memory/) will have either a `sessionId` or some `namespace` to allow keeping track of different conversations. Please refer to the specific implementations to check how it is parameterized.\n", + "\n", + "The built-in `InMemoryChatMessageHistory` does not contains such a parameterization, so we'll create a dictionary to keep track of the message histories." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "28049308-2543-48e6-90d0-37a88951a637", + "metadata": {}, + "outputs": [], + "source": [ + "import { InMemoryChatMessageHistory } from \"@langchain/core/chat_history\";\n", + "\n", + "const chatsBySessionId: Record = {}\n", + "\n", + "const getChatHistory = (sessionId: string) => {\n", + " let chatHistory: InMemoryChatMessageHistory | undefined = chatsBySessionId[sessionId]\n", + " if (!chatHistory) {\n", + " chatHistory = new InMemoryChatMessageHistory()\n", + " chatsBySessionId[sessionId] = chatHistory\n", + " }\n", + " return chatHistory\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "94c53ce3-4212-41e6-8ad3-f0ab5df6130f", + "metadata": {}, + "source": [ + "## Use with LangGraph\n", + "\n", + "Next, we'll set up a basic chat bot using LangGraph. If you're not familiar with LangGraph, you should look at the following [Quick Start Tutorial](https://langchain-ai.github.io/langgraphjs/tutorials/quickstart/).\n", + "\n", + "We'll create a [LangGraph node](https://langchain-ai.github.io/langgraphjs/concepts/low_level/#nodes) for the chat model, and manually manage the conversation history, taking into account the conversation ID passed as part of the RunnableConfig.\n", + "\n", + "The conversation ID can be passed as either part of the RunnableConfig (as we'll do here), or as part of the [graph state](https://langchain-ai.github.io/langgraphjs/concepts/low_level/#state)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d818e23f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hi! I'm bob\n", + "Hello Bob! It's nice to meet you. How can I assist you today?\n", + "what was my name?\n", + "You said your name is Bob.\n" + ] + } + ], + "source": [ + "import { v4 as uuidv4 } from \"uuid\";\n", + "import { ChatAnthropic } from \"@langchain/anthropic\";\n", + "import { StateGraph, MessagesAnnotation, END, START } from \"@langchain/langgraph\";\n", + "import { HumanMessage } from \"@langchain/core/messages\";\n", + "import { RunnableConfig } from \"@langchain/core/runnables\";\n", + "\n", + "// Define a chat model\n", + "const model = new ChatAnthropic({ modelName: \"claude-3-haiku-20240307\" });\n", + "\n", + "// Define the function that calls the model\n", + "const callModel = async (\n", + " state: typeof MessagesAnnotation.State,\n", + " config: RunnableConfig\n", + "): Promise> => {\n", + " if (!config.configurable?.sessionId) {\n", + " throw new Error(\n", + " \"Make sure that the config includes the following information: {'configurable': {'sessionId': 'some_value'}}\"\n", + " );\n", + " }\n", + "\n", + " const chatHistory = getChatHistory(config.configurable.sessionId as string);\n", + "\n", + " let messages = [...(await chatHistory.getMessages()), ...state.messages];\n", + "\n", + " if (state.messages.length === 1) {\n", + " // First message, ensure it's in the chat history\n", + " await chatHistory.addMessage(state.messages[0]);\n", + " }\n", + "\n", + " const aiMessage = await model.invoke(messages);\n", + "\n", + " // Update the chat history\n", + " await chatHistory.addMessage(aiMessage);\n", + "\n", + " return { messages: [aiMessage] };\n", + "};\n", + "\n", + "// Define a new graph\n", + "const workflow = new StateGraph(MessagesAnnotation)\n", + " .addNode(\"model\", callModel)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END);\n", + "\n", + "const app = workflow.compile();\n", + "\n", + "// Create a unique session ID to identify the conversation\n", + "const sessionId = uuidv4();\n", + "const config = { configurable: { sessionId }, streamMode: \"values\" as const };\n", + "\n", + "const inputMessage = new HumanMessage(\"hi! I'm bob\");\n", + "\n", + "for await (const event of await app.stream({ messages: [inputMessage] }, config)) {\n", + " const lastMessage = event.messages[event.messages.length - 1];\n", + " console.log(lastMessage.content);\n", + "}\n", + "\n", + "// Here, let's confirm that the AI remembers our name!\n", + "const followUpMessage = new HumanMessage(\"what was my name?\");\n", + "\n", + "for await (const event of await app.stream({ messages: [followUpMessage] }, config)) {\n", + " const lastMessage = event.messages[event.messages.length - 1];\n", + " console.log(lastMessage.content);\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "da0536dd-9a0b-49e3-b0b6-e8c7abf3b1f9", + "metadata": {}, + "source": [ + "## Using With RunnableWithMessageHistory\n", + "\n", + "This how-to guide used the `messages` and `addMessages` interface of `BaseChatMessageHistory` directly. \n", + "\n", + "Alternatively, you can use [RunnableWithMessageHistory](https://api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html), as [LCEL](/docs/concepts/#langchain-expression-language-lcel/) can be used inside any [LangGraph node](https://langchain-ai.github.io/langgraphjs/concepts/low_level/#nodes).\n", + "\n", + "To do that replace the following code:\n", + "\n", + "```typescript\n", + "const callModel = async (\n", + " state: typeof MessagesAnnotation.State,\n", + " config: RunnableConfig\n", + "): Promise> => {\n", + " // highlight-start\n", + " if (!config.configurable?.sessionId) {\n", + " throw new Error(\n", + " \"Make sure that the config includes the following information: {'configurable': {'sessionId': 'some_value'}}\"\n", + " );\n", + " }\n", + "\n", + " const chatHistory = getChatHistory(config.configurable.sessionId as string);\n", + "\n", + " let messages = [...(await chatHistory.getMessages()), ...state.messages];\n", + "\n", + " if (state.messages.length === 1) {\n", + " // First message, ensure it's in the chat history\n", + " await chatHistory.addMessage(state.messages[0]);\n", + " }\n", + "\n", + " const aiMessage = await model.invoke(messages);\n", + "\n", + " // Update the chat history\n", + " await chatHistory.addMessage(aiMessage);\n", + " // highlight-end\n", + " return { messages: [aiMessage] };\n", + "};\n", + "```\n", + "\n", + "With the corresponding instance of `RunnableWithMessageHistory` defined in your current application.\n", + "\n", + "```typescript\n", + "const runnable = new RunnableWithMessageHistory({\n", + " // ... configuration from existing code\n", + "});\n", + "\n", + "const callModel = async (\n", + " state: typeof MessagesAnnotation.State,\n", + " config: RunnableConfig\n", + "): Promise> => {\n", + " // RunnableWithMessageHistory takes care of reading the message history\n", + " // and updating it with the new human message and AI response.\n", + " const aiMessage = await runnable.invoke(state.messages, config);\n", + " return {\n", + " messages: [aiMessage]\n", + " };\n", + "};\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/versions/migrating_memory/conversation_buffer_window_memory.ipynb b/docs/core_docs/docs/versions/migrating_memory/conversation_buffer_window_memory.ipynb new file mode 100644 index 000000000000..719cf9d1022e --- /dev/null +++ b/docs/core_docs/docs/versions/migrating_memory/conversation_buffer_window_memory.ipynb @@ -0,0 +1,643 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ce8457ed-c0b1-4a74-abbd-9d3d2211270f", + "metadata": {}, + "source": [ + "# Migrating off ConversationTokenBufferMemory\n", + "\n", + "Follow this guide if you're trying to migrate off one of the old memory classes listed below:\n", + "\n", + "\n", + "| Memory Type | Description |\n", + "|----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "| `ConversationTokenBufferMemory` | Keeps only the most recent messages in the conversation under the constraint that the total number of tokens in the conversation does not exceed a certain limit. |\n", + "\n", + "`ConversationTokenBufferMemory` applies additional processing on top of the raw conversation history to trim the conversation history to a size that fits inside the context window of a chat model. \n", + "\n", + "This processing functionality can be accomplished using LangChain's built-in [trimMessages](https://api.js.langchain.com/functions/_langchain_core.messages.trimMessages.html) function." + ] + }, + { + "cell_type": "markdown", + "id": "79935247-acc7-4a05-a387-5d72c9c8c8cb", + "metadata": {}, + "source": [ + "```{=mdx}\n", + ":::important\n", + "\n", + "We’ll begin by exploring a straightforward method that involves applying processing logic to the entire conversation history.\n", + "\n", + "While this approach is easy to implement, it has a downside: as the conversation grows, so does the latency, since the logic is re-applied to all previous exchanges in the conversation at each turn.\n", + "\n", + "More advanced strategies focus on incrementally updating the conversation history to avoid redundant processing.\n", + "\n", + "For instance, the LangGraph [how-to guide on summarization](https://langchain-ai.github.io/langgraphjs/how-tos/add-summary-conversation-history/) demonstrates\n", + "how to maintain a running summary of the conversation while discarding older messages, ensuring they aren't re-processed during later turns.\n", + ":::\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "d07f9459-9fb6-4942-99c9-64558aedd7d4", + "metadata": {}, + "source": [ + "## Set up\n", + "\n", + "### Dependencies\n", + "\n", + "```{=mdx}\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\"\n", + "\n", + "\n", + " @langchain/openai @langchain/core zod\n", + "\n", + "```\n", + "\n", + "### Environment variables\n", + "\n", + "```typescript\n", + "process.env.OPENAI_API_KEY = \"YOUR_OPENAI_API_KEY\";\n", + "```\n", + "\n", + "```{=mdx}\n", + "
\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "7ce2d951", + "metadata": {}, + "source": [ + "## Reimplementing ConversationTokenBufferMemory logic\n", + "\n", + "Here, we'll use `trimMessages` to keeps the system message and the most recent messages in the conversation under the constraint that the total number of tokens in the conversation does not exceed a certain limit." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e1550bee", + "metadata": {}, + "outputs": [], + "source": [ + "import {\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage,\n", + "} from \"@langchain/core/messages\";\n", + "\n", + "const messages = [\n", + " new SystemMessage(\"you're a good assistant, you always respond with a joke.\"),\n", + " new HumanMessage(\"i wonder why it's called langchain\"),\n", + " new AIMessage(\n", + " 'Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!'\n", + " ),\n", + " new HumanMessage(\"and who is harrison chasing anyways\"),\n", + " new AIMessage(\n", + " \"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\"\n", + " ),\n", + " new HumanMessage(\"why is 42 always the answer?\"),\n", + " new AIMessage(\n", + " \"Because it's the only number that's constantly right, even when it doesn't add up!\"\n", + " ),\n", + " new HumanMessage(\"What did the cow say?\"),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6442f74b-2c36-48fd-a3d1-c7c5d18c050f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SystemMessage {\n", + " \"content\": \"you're a good assistant, you always respond with a joke.\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + "}\n", + "HumanMessage {\n", + " \"content\": \"and who is harrison chasing anyways\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + "}\n", + "AIMessage {\n", + " \"content\": \"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + "}\n", + "HumanMessage {\n", + " \"content\": \"why is 42 always the answer?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + "}\n", + "AIMessage {\n", + " \"content\": \"Because it's the only number that's constantly right, even when it doesn't add up!\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {},\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + "}\n", + "HumanMessage {\n", + " \"content\": \"What did the cow say?\",\n", + " \"additional_kwargs\": {},\n", + " \"response_metadata\": {}\n", + "}\n" + ] + } + ], + "source": [ + "import { trimMessages } from \"@langchain/core/messages\";\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "const selectedMessages = await trimMessages(\n", + " messages,\n", + " {\n", + " // Please see API reference for trimMessages for other ways to specify a token counter.\n", + " tokenCounter: new ChatOpenAI({ model: \"gpt-4o\" }),\n", + " maxTokens: 80, // <-- token limit\n", + " // The startOn is specified\n", + " // to make sure we do not generate a sequence where\n", + " // a ToolMessage that contains the result of a tool invocation\n", + " // appears before the AIMessage that requested a tool invocation\n", + " // as this will cause some chat models to raise an error.\n", + " startOn: \"human\",\n", + " strategy: \"last\",\n", + " includeSystem: true, // <-- Keep the system message\n", + " }\n", + ")\n", + "\n", + "for (const msg of selectedMessages) {\n", + " console.log(msg);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "0f05d272-2d22-44b7-9fa6-e617a48584b4", + "metadata": {}, + "source": [ + "```{=mdx}\n", + "
\n", + "```\n", + "\n", + "## Modern usage with LangGraph\n", + "\n", + "The example below shows how to use LangGraph to add simple conversation pre-processing logic.\n", + "\n", + "```{=mdx}\n", + ":::note\n", + "\n", + "If you want to avoid running the computation on the entire conversation history each time, you can follow\n", + "the [how-to guide on summarization](https://langchain-ai.github.io/langgraphjs/how-tos/add-summary-conversation-history/) that demonstrates\n", + "how to discard older messages, ensuring they aren't re-processed during later turns.\n", + "\n", + ":::\n", + "```\n", + "\n", + "```{=mdx}\n", + "
\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "05d360e0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hi! I'm bob\n", + "Hello, Bob! How can I assist you today?\n", + "what was my name?\n", + "You mentioned that your name is Bob. How can I help you today?\n" + ] + } + ], + "source": [ + "import { v4 as uuidv4 } from 'uuid';\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "import { StateGraph, MessagesAnnotation, END, START, MemorySaver } from \"@langchain/langgraph\";\n", + "import { trimMessages } from \"@langchain/core/messages\";\n", + "\n", + "// Define a chat model\n", + "const model = new ChatOpenAI({ model: \"gpt-4o\" });\n", + "\n", + "// Define the function that calls the model\n", + "const callModel = async (state: typeof MessagesAnnotation.State): Promise> => {\n", + " // highlight-start\n", + " const selectedMessages = await trimMessages(\n", + " state.messages,\n", + " {\n", + " tokenCounter: (messages) => messages.length, // Simple message count instead of token count\n", + " maxTokens: 5, // Allow up to 5 messages\n", + " strategy: \"last\",\n", + " startOn: \"human\",\n", + " includeSystem: true,\n", + " allowPartial: false,\n", + " }\n", + " );\n", + " // highlight-end\n", + "\n", + " const response = await model.invoke(selectedMessages);\n", + "\n", + " // With LangGraph, we're able to return a single message, and LangGraph will concatenate\n", + " // it to the existing list\n", + " return { messages: [response] };\n", + "};\n", + "\n", + "\n", + "// Define a new graph\n", + "const workflow = new StateGraph(MessagesAnnotation)\n", + "// Define the two nodes we will cycle between\n", + " .addNode(\"model\", callModel)\n", + " .addEdge(START, \"model\")\n", + " .addEdge(\"model\", END)\n", + "\n", + "const app = workflow.compile({\n", + " // Adding memory is straightforward in LangGraph!\n", + " // Just pass a checkpointer to the compile method.\n", + " checkpointer: new MemorySaver()\n", + "});\n", + "\n", + "// The thread id is a unique key that identifies this particular conversation\n", + "// ---\n", + "// NOTE: this must be `thread_id` and not `threadId` as the LangGraph internals expect `thread_id`\n", + "// ---\n", + "const thread_id = uuidv4();\n", + "const config = { configurable: { thread_id }, streamMode: \"values\" as const };\n", + "\n", + "const inputMessage = {\n", + " role: \"user\",\n", + " content: \"hi! I'm bob\",\n", + "}\n", + "for await (const event of await app.stream({ messages: [inputMessage] }, config)) {\n", + " const lastMessage = event.messages[event.messages.length - 1];\n", + " console.log(lastMessage.content);\n", + "}\n", + "\n", + "// Here, let's confirm that the AI remembers our name!\n", + "const followUpMessage = {\n", + " role: \"user\",\n", + " content: \"what was my name?\",\n", + "}\n", + "\n", + "// ---\n", + "// NOTE: You must pass the same thread id to continue the conversation\n", + "// we do that here by passing the same `config` object to the `.stream` call.\n", + "// ---\n", + "for await (const event of await app.stream({ messages: [followUpMessage] }, config)) {\n", + " const lastMessage = event.messages[event.messages.length - 1];\n", + " console.log(lastMessage.content);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "84229e2e-a578-4b21-840a-814223406402", + "metadata": {}, + "source": [ + "```{=mdx}\n", + "
\n", + "```\n", + "\n", + "## Usage with a pre-built langgraph agent\n", + "\n", + "This example shows usage of an Agent Executor with a pre-built agent constructed using the [createReactAgent](https://langchain-ai.github.io/langgraphjs/reference/functions/langgraph_prebuilt.createReactAgent.html) function.\n", + "\n", + "If you are using one of the [old LangChain pre-built agents](https://js.langchain.com/v0.1/docs/modules/agents/agent_types/), you should be able\n", + "to replace that code with the new [LangGraph pre-built agent](https://langchain-ai.github.io/langgraphjs/how-tos/create-react-agent/) which leverages\n", + "native tool calling capabilities of chat models and will likely work better out of the box.\n", + "\n", + "```{=mdx}\n", + "
\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9e54ccdc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hi! I'm bob. What is my age?\n", + "\n", + "42 years old\n", + "Hi Bob! You are 42 years old.\n", + "do you remember my name?\n", + "Yes, your name is Bob! If there's anything else you'd like to know or discuss, feel free to ask.\n" + ] + } + ], + "source": [ + "import { z } from \"zod\";\n", + "import { v4 as uuidv4 } from 'uuid';\n", + "import { BaseMessage, trimMessages } from \"@langchain/core/messages\";\n", + "import { tool } from \"@langchain/core/tools\";\n", + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "import { MemorySaver } from \"@langchain/langgraph\";\n", + "import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n", + "\n", + "const getUserAge = tool(\n", + " (name: string): string => {\n", + " // This is a placeholder for the actual implementation\n", + " if (name.toLowerCase().includes(\"bob\")) {\n", + " return \"42 years old\";\n", + " }\n", + " return \"41 years old\";\n", + " },\n", + " {\n", + " name: \"get_user_age\",\n", + " description: \"Use this tool to find the user's age.\",\n", + " schema: z.string().describe(\"the name of the user\"),\n", + " }\n", + ");\n", + "\n", + "const memory = new MemorySaver();\n", + "const model2 = new ChatOpenAI({ model: \"gpt-4o\" });\n", + "\n", + "// highlight-start\n", + "const stateModifier = async (messages: BaseMessage[]): Promise => {\n", + " // We're using the message processor defined above.\n", + " return trimMessages(\n", + " messages,\n", + " {\n", + " tokenCounter: (msgs) => msgs.length, // <-- .length will simply count the number of messages rather than tokens\n", + " maxTokens: 5, // <-- allow up to 5 messages.\n", + " strategy: \"last\",\n", + " // The startOn is specified\n", + " // to make sure we do not generate a sequence where\n", + " // a ToolMessage that contains the result of a tool invocation\n", + " // appears before the AIMessage that requested a tool invocation\n", + " // as this will cause some chat models to raise an error.\n", + " startOn: \"human\",\n", + " includeSystem: true, // <-- Keep the system message\n", + " allowPartial: false,\n", + " }\n", + " );\n", + "};\n", + "// highlight-end\n", + "\n", + "const app2 = createReactAgent({\n", + " llm: model2,\n", + " tools: [getUserAge],\n", + " checkpointSaver: memory,\n", + " // highlight-next-line\n", + " messageModifier: stateModifier,\n", + "});\n", + "\n", + "// The thread id is a unique key that identifies\n", + "// this particular conversation.\n", + "// We'll just generate a random uuid here.\n", + "const threadId2 = uuidv4();\n", + "const config2 = { configurable: { thread_id: threadId2 }, streamMode: \"values\" as const };\n", + "\n", + "// Tell the AI that our name is Bob, and ask it to use a tool to confirm\n", + "// that it's capable of working like an agent.\n", + "const inputMessage2 = {\n", + " role: \"user\",\n", + " content: \"hi! I'm bob. What is my age?\",\n", + "}\n", + "\n", + "for await (const event of await app2.stream({ messages: [inputMessage2] }, config2)) {\n", + " const lastMessage = event.messages[event.messages.length - 1];\n", + " console.log(lastMessage.content);\n", + "}\n", + "\n", + "// Confirm that the chat bot has access to previous conversation\n", + "// and can respond to the user saying that the user's name is Bob.\n", + "const followUpMessage2 = {\n", + " role: \"user\",\n", + " content: \"do you remember my name?\",\n", + "};\n", + "\n", + "for await (const event of await app2.stream({ messages: [followUpMessage2] }, config2)) {\n", + " const lastMessage = event.messages[event.messages.length - 1];\n", + " console.log(lastMessage.content);\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f4d16e09-1d90-4153-8576-6d3996cb5a6c", + "metadata": {}, + "source": [ + "```{=mdx}\n", + "
\n", + "```\n", + "\n", + "## LCEL: Add a preprocessing step\n", + "\n", + "The simplest way to add complex conversation management is by introducing a pre-processing step in front of the chat model and pass the full conversation history to the pre-processing step.\n", + "\n", + "This approach is conceptually simple and will work in many situations; for example, if using a [RunnableWithMessageHistory](/docs/how_to/message_history/) instead of wrapping the chat model, wrap the chat model with the pre-processor.\n", + "\n", + "The obvious downside of this approach is that latency starts to increase as the conversation history grows because of two reasons:\n", + "\n", + "1. As the conversation gets longer, more data may need to be fetched from whatever store your'e using to store the conversation history (if not storing it in memory).\n", + "2. The pre-processing logic will end up doing a lot of redundant computation, repeating computation from previous steps of the conversation.\n", + "\n", + "```{=mdx}\n", + ":::caution\n", + "\n", + "If you want to use a chat model's tool calling capabilities, remember to bind the tools to the model before adding the history pre-processing step to it!\n", + "\n", + ":::\n", + "```\n", + "\n", + "```{=mdx}\n", + "
\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a1c8adf2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AIMessage {\n", + " \"id\": \"chatcmpl-AB6uzWscxviYlbADFeDlnwIH82Fzt\",\n", + " \"content\": \"\",\n", + " \"additional_kwargs\": {\n", + " \"tool_calls\": [\n", + " {\n", + " \"id\": \"call_TghBL9dzqXFMCt0zj0VYMjfp\",\n", + " \"type\": \"function\",\n", + " \"function\": \"[Object]\"\n", + " }\n", + " ]\n", + " },\n", + " \"response_metadata\": {\n", + " \"tokenUsage\": {\n", + " \"completionTokens\": 16,\n", + " \"promptTokens\": 95,\n", + " \"totalTokens\": 111\n", + " },\n", + " \"finish_reason\": \"tool_calls\",\n", + " \"system_fingerprint\": \"fp_a5d11b2ef2\"\n", + " },\n", + " \"tool_calls\": [\n", + " {\n", + " \"name\": \"what_did_the_cow_say\",\n", + " \"args\": {},\n", + " \"type\": \"tool_call\",\n", + " \"id\": \"call_TghBL9dzqXFMCt0zj0VYMjfp\"\n", + " }\n", + " ],\n", + " \"invalid_tool_calls\": [],\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 95,\n", + " \"output_tokens\": 16,\n", + " \"total_tokens\": 111\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "import { AIMessage, HumanMessage, SystemMessage, BaseMessage, trimMessages } from \"@langchain/core/messages\";\n", + "import { tool } from \"@langchain/core/tools\";\n", + "import { z } from \"zod\";\n", + "\n", + "const model3 = new ChatOpenAI({ model: \"gpt-4o\" });\n", + "\n", + "const whatDidTheCowSay = tool(\n", + " (): string => {\n", + " return \"foo\";\n", + " },\n", + " {\n", + " name: \"what_did_the_cow_say\",\n", + " description: \"Check to see what the cow said.\",\n", + " schema: z.object({}),\n", + " }\n", + ");\n", + "\n", + "// highlight-start\n", + "const messageProcessor = trimMessages(\n", + " {\n", + " tokenCounter: (msgs) => msgs.length, // <-- .length will simply count the number of messages rather than tokens\n", + " maxTokens: 5, // <-- allow up to 5 messages.\n", + " strategy: \"last\",\n", + " // The startOn is specified\n", + " // to make sure we do not generate a sequence where\n", + " // a ToolMessage that contains the result of a tool invocation\n", + " // appears before the AIMessage that requested a tool invocation\n", + " // as this will cause some chat models to raise an error.\n", + " startOn: \"human\",\n", + " includeSystem: true, // <-- Keep the system message\n", + " allowPartial: false,\n", + " }\n", + ");\n", + "// highlight-end\n", + "\n", + "// Note that we bind tools to the model first!\n", + "const modelWithTools = model3.bindTools([whatDidTheCowSay]);\n", + "\n", + "// highlight-next-line\n", + "const modelWithPreprocessor = messageProcessor.pipe(modelWithTools);\n", + "\n", + "const fullHistory = [\n", + " new SystemMessage(\"you're a good assistant, you always respond with a joke.\"),\n", + " new HumanMessage(\"i wonder why it's called langchain\"),\n", + " new AIMessage('Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!'),\n", + " new HumanMessage(\"and who is harrison chasing anyways\"),\n", + " new AIMessage(\"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\"),\n", + " new HumanMessage(\"why is 42 always the answer?\"),\n", + " new AIMessage(\"Because it's the only number that's constantly right, even when it doesn't add up!\"),\n", + " new HumanMessage(\"What did the cow say?\"),\n", + "];\n", + "\n", + "// We pass it explicitly to the modelWithPreprocessor for illustrative purposes.\n", + "// If you're using `RunnableWithMessageHistory` the history will be automatically\n", + "// read from the source that you configure.\n", + "const result = await modelWithPreprocessor.invoke(fullHistory);\n", + "console.log(result);" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5da7225a-5e94-4f53-bb0d-86b6b528d150", + "metadata": {}, + "source": [ + "```{=mdx}\n", + "
\n", + "```\n", + "\n", + "If you need to implement more efficient logic and want to use `RunnableWithMessageHistory` for now the way to achieve this\n", + "is to subclass from [BaseChatMessageHistory](https://api.js.langchain.com/classes/_langchain_core.chat_history.BaseChatMessageHistory.html) and\n", + "define appropriate logic for `addMessages` (that doesn't simply append the history, but instead re-writes it).\n", + "\n", + "Unless you have a good reason to implement this solution, you should instead use LangGraph." + ] + }, + { + "cell_type": "markdown", + "id": "b2717810", + "metadata": {}, + "source": [ + "## Next steps\n", + "\n", + "Explore persistence with LangGraph:\n", + "\n", + "* [LangGraph quickstart tutorial](https://langchain-ai.github.io/langgraphjs/tutorials/quickstart/)\n", + "* [How to add persistence (\"memory\") to your graph](https://langchain-ai.github.io/langgraphjs/how-tos/persistence/)\n", + "* [How to manage conversation history](https://langchain-ai.github.io/langgraphjs/how-tos/manage-conversation-history/)\n", + "* [How to add summary of the conversation history](https://langchain-ai.github.io/langgraphjs/how-tos/add-summary-conversation-history/)\n", + "\n", + "Add persistence with simple LCEL (favor LangGraph for more complex use cases):\n", + "\n", + "* [How to add message history](/docs/how_to/message_history/)\n", + "\n", + "Working with message history:\n", + "\n", + "* [How to trim messages](/docs/how_to/trim_messages)\n", + "* [How to filter messages](/docs/how_to/filter_messages/)\n", + "* [How to merge message runs](/docs/how_to/merge_message_runs/)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/versions/migrating_memory/conversation_summary_memory.ipynb b/docs/core_docs/docs/versions/migrating_memory/conversation_summary_memory.ipynb new file mode 100644 index 000000000000..50b2fbea4f96 --- /dev/null +++ b/docs/core_docs/docs/versions/migrating_memory/conversation_summary_memory.ipynb @@ -0,0 +1,45 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ce8457ed-c0b1-4a74-abbd-9d3d2211270f", + "metadata": {}, + "source": [ + "# Migrating off ConversationSummaryMemory or ConversationSummaryBufferMemory\n", + "\n", + "Follow this guide if you're trying to migrate off one of the old memory classes listed below:\n", + "\n", + "\n", + "| Memory Type | Description |\n", + "|---------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "| `ConversationSummaryMemory` | Continually summarizes the conversation history. The summary is updated after each conversation turn. The abstraction returns the summary of the conversation history. |\n", + "| `ConversationSummaryBufferMemory` | Provides a running summary of the conversation together with the most recent messages in the conversation under the constraint that the total number of tokens in the conversation does not exceed a certain limit. |\n", + "\n", + "Please follow the following [how-to guide on summarization](https://langchain-ai.github.io/langgraphjs/how-tos/add-summary-conversation-history/) in LangGraph. \n", + "\n", + "This guide shows how to maintain a running summary of the conversation while discarding older messages, ensuring they aren't re-processed during later turns." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/versions/migrating_memory/index.mdx b/docs/core_docs/docs/versions/migrating_memory/index.mdx new file mode 100644 index 000000000000..837e7f03c544 --- /dev/null +++ b/docs/core_docs/docs/versions/migrating_memory/index.mdx @@ -0,0 +1,139 @@ +--- +sidebar_position: 1 +--- + +# How to migrate to LangGraph memory + +As of the v0.3 release of LangChain, we recommend that LangChain users take advantage of LangGraph persistence to incorporate `memory` into their LangChain application. + +- Users that rely on `RunnableWithMessageHistory` or `BaseChatMessageHistory` do **not** need to make any changes, but are encouraged to consider using LangGraph for more complex use cases. +- Users that rely on deprecated memory abstractions from LangChain 0.0.x should follow this guide to upgrade to the new LangGraph persistence feature in LangChain 0.3.x. + +## Why use LangGraph for memory? + +The main advantages of persistence in LangGraph are: + +- Built-in support for multiple users and conversations, which is a typical requirement for real-world conversational AI applications. +- Ability to save and resume complex conversations at any point. This helps with: + - Error recovery + - Allowing human intervention in AI workflows + - Exploring different conversation paths ("time travel") +- Full compatibility with both traditional [language models](/docs/concepts/#llms) and modern [chat models](/docs/concepts/#chat-models). Early memory implementations in LangChain weren't designed for newer chat model APIs, causing issues with features like tool-calling. LangGraph memory can persist any custom state. +- Highly customizable, allowing you to fully control how memory works and use different storage backends. + +## Evolution of memory in LangChain + +The concept of memory has evolved significantly in LangChain since its initial release. + +### LangChain 0.0.x memory + +Broadly speaking, LangChain 0.0.x memory was used to handle three main use cases: + +| Use Case | Example | +| ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| Managing conversation history | Keep only the last `n` turns of the conversation between the user and the AI. | +| Extraction of structured information | Extract structured information from the conversation history, such as a list of facts learned about the user. | +| Composite memory implementations | Combine multiple memory sources, e.g., a list of known facts about the user along with facts learned during a given conversation. | + +While the LangChain 0.0.x memory abstractions were useful, they were limited in their capabilities and not well suited for real-world conversational AI applications. These memory abstractions lacked built-in support for multi-user, multi-conversation scenarios, which are essential for practical conversational AI systems. + +Most of these implementations have been officially deprecated in LangChain 0.3.x in favor of LangGraph persistence. + +### RunnableWithMessageHistory and BaseChatMessageHistory + +:::note +Please see [How to use BaseChatMessageHistory with LangGraph](./chat_history), if you would like to use `BaseChatMessageHistory` (with or without `RunnableWithMessageHistory`) in LangGraph. +::: + +As of LangChain v0.1, we started recommending that users rely primarily on [BaseChatMessageHistory](https://api.js.langchain.com/classes/_langchain_core.chat_history.BaseChatMessageHistory.html). `BaseChatMessageHistory` serves +as a simple persistence for storing and retrieving messages in a conversation. + +At that time, the only option for orchestrating LangChain chains was via [LCEL](/docs/how_to/#langchain-expression-language-lcel). To incorporate memory with `LCEL`, users had to use the [RunnableWithMessageHistory](https://api.js.langchain.com/classes/_langchain_core.runnables.RunnableWithMessageHistory.html) interface. While sufficient for basic chat applications, many users found the API unintuitive and challenging to use. + +As of LangChain v0.3, we recommend that **new** code takes advantage of LangGraph for both orchestration and persistence: + +- Orchestration: In LangGraph, users define [graphs](https://langchain-ai.github.io/langgraphjs/concepts/low_level/) that specify the flow of the application. This allows users to keep using `LCEL` within individual nodes when `LCEL` is needed, while making it easy to define complex orchestration logic that is more readable and maintainable. +- Persistence: Users can rely on LangGraph's persistence to store and retrieve data. LangGraph persistence is extremely flexible and can support a much wider range of use cases than the `RunnableWithMessageHistory` interface. + +:::important +If you have been using `RunnableWithMessageHistory` or `BaseChatMessageHistory`, you do not need to make any changes. We do not plan on deprecating either functionality in the near future. This functionality is sufficient for simple chat applications and any code that uses `RunnableWithMessageHistory` will continue to work as expected. +::: + +## Migrations + +:::info Prerequisites + +These guides assume some familiarity with the following concepts: + +- [LangGraph](https://langchain-ai.github.io/langgraphjs/) +- [v0.0.x Memory](https://js.langchain.com/v0.1/docs/modules/memory/) +- [How to add persistence ("memory") to your graph](https://langchain-ai.github.io/langgraphjs/how-tos/persistence/) + ::: + +### 1. Managing conversation history + +The goal of managing conversation history is to store and retrieve the history in a way that is optimal for a chat model to use. + +Often this involves trimming and / or summarizing the conversation history to keep the most relevant parts of the conversation while having the conversation fit inside the context window of the chat model. + +Memory classes that fall into this category include: + +| Memory Type | How to Migrate | Description | +| --------------------------------- | :----------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `ConversationTokenBufferMemory` | [Link to Migration Guide](conversation_buffer_window_memory) | Keeps only the most recent messages in the conversation under the constraint that the total number of tokens in the conversation does not exceed a certain limit. | +| `ConversationSummaryMemory` | [Link to Migration Guide](conversation_summary_memory) | Continually summarizes the conversation history. The summary is updated after each conversation turn. The abstraction returns the summary of the conversation history. | +| `ConversationSummaryBufferMemory` | [Link to Migration Guide](conversation_summary_memory) | Provides a running summary of the conversation together with the most recent messages in the conversation under the constraint that the total number of tokens in the conversation does not exceed a certain limit. | + +### 2. Extraction of structured information from the conversation history + +Memory classes that fall into this category include: + +| Memory Type | Description | +| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `BaseEntityStore` | An abstract interface that resembles a key-value store. It was used for storing structured information learned during the conversation. The information had to be represented as an object of key-value pairs. | + +And specific backend implementations of abstractions: + +| Memory Type | Description | +| --------------------- | -------------------------------------------------------------------------------------------------------- | +| `InMemoryEntityStore` | An implementation of `BaseEntityStore` that stores the information in the literal computer memory (RAM). | + +These abstractions have not received much development since their initial release. The reason +is that for these abstractions to be useful they typically require a lot of specialization for a particular application, so these +abstractions are not as widely used as the conversation history management abstractions. + +For this reason, there are no migration guides for these abstractions. If you're struggling to migrate an application +that relies on these abstractions, please pen an issue on the LangChain GitHub repository, explain your use case, and we'll try to provide more guidance on how to migrate these abstractions. + +The general strategy for extracting structured information from the conversation history is to use a chat model with tool calling capabilities to extract structured information from the conversation history. +The extracted information can then be saved into an appropriate data structure (e.g., an object), and information from it can be retrieved and added into the prompt as needed. + +### 3. Implementations that provide composite logic on top of one or more memory implementations + +Memory classes that fall into this category include: + +| Memory Type | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------ | +| `CombinedMemory` | This abstraction accepted a list of `BaseMemory` and fetched relevant memory information from each of them based on the input. | + +These implementations did not seem to be used widely or provide significant value. Users should be able +to re-implement these without too much difficulty in custom code. + +## Related Resources + +Explore persistence with LangGraph: + +- [LangGraph quickstart tutorial](https://langchain-ai.github.io/langgraphjs/tutorials/quickstart/) +- [How to add persistence ("memory") to your graph](https://langchain-ai.github.io/langgraphjs/how-tos/persistence/) +- [How to manage conversation history](https://langchain-ai.github.io/langgraphjs/how-tos/manage-conversation-history/) +- [How to add summary of the conversation history](https://langchain-ai.github.io/langgraphjs/how-tos/add-summary-conversation-history/) + +Add persistence with simple LCEL (favor langgraph for more complex use cases): + +- [How to add message history](/docs/how_to/message_history/) + +Working with message history: + +- [How to trim messages](/docs/how_to/trim_messages) +- [How to filter messages](/docs/how_to/filter_messages/) +- [How to merge message runs](/docs/how_to/merge_message_runs/) diff --git a/docs/core_docs/docs/versions/v0_2/migrating_astream_events.mdx b/docs/core_docs/docs/versions/v0_2/migrating_astream_events.mdx index 023c6ad234bd..8fde2bbfe3de 100644 --- a/docs/core_docs/docs/versions/v0_2/migrating_astream_events.mdx +++ b/docs/core_docs/docs/versions/v0_2/migrating_astream_events.mdx @@ -1,6 +1,6 @@ --- sidebar_position: 2 -sidebar_label: streamEvents v2 +sidebar_label: Migrating to streamEvents v2 --- # Migrating to streamEvents v2 diff --git a/docs/core_docs/sidebars.js b/docs/core_docs/sidebars.js index e4297b057712..50336c478fd8 100644 --- a/docs/core_docs/sidebars.js +++ b/docs/core_docs/sidebars.js @@ -73,9 +73,35 @@ module.exports = { collapsible: false, items: [ { - type: "autogenerated", - dirName: "versions", + type: "doc", + id: "versions/v0_3/index", + label: "v0.3", + }, + { + type: "category", + label: "v0.2", + items: [ + { + type: "autogenerated", + dirName: "versions/v0_2", + }, + ], + }, + { + type: "category", + label: "Migrating from v0.0 memory", + link: { type: "doc", id: "versions/migrating_memory/index" }, + collapsible: false, + collapsed: false, + items: [ + { + type: "autogenerated", + dirName: "versions/migrating_memory", + className: "hidden", + }, + ], }, + "versions/release_policy", ], }, "security", diff --git a/docs/core_docs/static/img/conversational_retrieval_chain.png b/docs/core_docs/static/img/conversational_retrieval_chain.png new file mode 100644 index 000000000000..1130df556af2 Binary files /dev/null and b/docs/core_docs/static/img/conversational_retrieval_chain.png differ