🚀 feat: o1 Tool Calling & reasoning_effort (#5553)

* fix: Update @librechat/agents to version 1.9.98 * feat: o1 tool calling * fix: Improve error logging in RouteErrorBoundary * refactor: Move extractContent function to utils and clean up Artifact component * refactor: optimize reasoning UI post-streaming and deprecate plugins rendering * feat: reasoning_effort support * fix: update request content type handling in openapiToFunction to remove default 'application/x-www-form-urlencoded' * chore: bump v0.7.696 data-provider
danny-avila · Jan 30, 2025 · 587d46a · 587d46a
1 parent 591a019
commit 587d46a
Show file tree

Hide file tree

Showing 17 changed files with 1,970 additions and 1,903 deletions.
diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js
@@ -1286,6 +1286,8 @@ ${convo}
       ) {
         delete modelOptions.stream;
         delete modelOptions.stop;
+      } else if (!this.isO1Model && modelOptions.reasoning_effort != null) {
+        delete modelOptions.reasoning_effort;
       }
 
       let reasoningKey = 'reasoning_content';

diff --git a/api/package.json b/api/package.json
@@ -44,7 +44,7 @@
     "@langchain/google-genai": "^0.1.7",
     "@langchain/google-vertexai": "^0.1.8",
     "@langchain/textsplitters": "^0.1.0",
-    "@librechat/agents": "^1.9.97",
+    "@librechat/agents": "^1.9.98",
     "@waylaidwanderer/fetch-event-source": "^3.0.1",
     "axios": "^1.7.7",
     "bcryptjs": "^2.4.3",

diff --git a/api/server/controllers/agents/callbacks.js b/api/server/controllers/agents/callbacks.js
@@ -3,6 +3,7 @@ const {
   EnvVar,
   Providers,
   GraphEvents,
+  getMessageId,
   ToolEndHandler,
   handleToolCalls,
   ChatModelStreamHandler,
@@ -46,7 +47,7 @@ class ModelEndHandler {
     }
 
     try {
-      if (metadata.provider === Providers.GOOGLE) {
+      if (metadata.provider === Providers.GOOGLE || graph.clientOptions?.disableStreaming) {
         handleToolCalls(data?.output?.tool_calls, metadata, graph);
       }
 
@@ -59,6 +60,38 @@ class ModelEndHandler {
       }
 
       this.collectedUsage.push(usage);
+      if (!graph.clientOptions?.disableStreaming) {
+        return;
+      }
+      if (!data.output.content) {
+        return;
+      }
+      const stepKey = graph.getStepKey(metadata);
+      const message_id = getMessageId(stepKey, graph) ?? '';
+      if (message_id) {
+        graph.dispatchRunStep(stepKey, {
+          type: StepTypes.MESSAGE_CREATION,
+          message_creation: {
+            message_id,
+          },
+        });
+      }
+      const stepId = graph.getStepIdByKey(stepKey);
+      const content = data.output.content;
+      if (typeof content === 'string') {
+        graph.dispatchMessageDelta(stepId, {
+          content: [
+            {
+              type: 'text',
+              text: content,
+            },
+          ],
+        });
+      } else if (content.every((c) => c.type?.startsWith('text'))) {
+        graph.dispatchMessageDelta(stepId, {
+          content,
+        });
+      }
     } catch (error) {
       logger.error('Error handling model end event:', error);
     }

diff --git a/api/server/controllers/agents/run.js b/api/server/controllers/agents/run.js
@@ -41,6 +41,11 @@ async function createRun({
     agent.model_parameters,
   );
 
+  if (/o1(?!-(?:mini|preview)).*$/.test(llmConfig.model)) {
+    llmConfig.streaming = false;
+    llmConfig.disableStreaming = true;
+  }
+
   /** @type {StandardGraphConfig} */
   const graphConfig = {
     signal,

diff --git a/client/src/components/Artifacts/Artifact.tsx b/client/src/components/Artifacts/Artifact.tsx
@@ -6,8 +6,8 @@ import type { Pluggable } from 'unified';
 import type { Artifact } from '~/common';
 import { useMessageContext, useArtifactContext } from '~/Providers';
 import { artifactsState } from '~/store/artifacts';
+import { logger, extractContent } from '~/utils';
 import ArtifactButton from './ArtifactButton';
-import { logger } from '~/utils';
 
 export const artifactPlugin: Pluggable = () => {
   return (tree) => {
@@ -22,21 +22,6 @@ export const artifactPlugin: Pluggable = () => {
   };
 };
 
-const extractContent = (
-  children: React.ReactNode | { props: { children: React.ReactNode } } | string,
-): string => {
-  if (typeof children === 'string') {
-    return children;
-  }
-  if (React.isValidElement(children)) {
-    return extractContent((children.props as { children?: React.ReactNode }).children);
-  }
-  if (Array.isArray(children)) {
-    return children.map(extractContent).join('');
-  }
-  return '';
-};
-
 export function Artifact({
   // eslint-disable-next-line @typescript-eslint/no-unused-vars
   node,

diff --git a/client/src/components/Artifacts/Thinking.tsx b/client/src/components/Artifacts/Thinking.tsx
@@ -3,6 +3,7 @@ import { useRecoilValue } from 'recoil';
 import { Atom, ChevronDown } from 'lucide-react';
 import type { MouseEvent, FC } from 'react';
 import { useLocalize } from '~/hooks';
+import { cn } from '~/utils';
 import store from '~/store';
 
 const BUTTON_STYLES = {
@@ -63,19 +64,21 @@ const Thinking: React.ElementType = memo(({ children }: { children: React.ReactN
   }
 
   return (
-    <div className="mb-3">
-      <ThinkingButton isExpanded={isExpanded} onClick={handleClick} label={label} />
+    <>
+      <div className="mb-5">
+        <ThinkingButton isExpanded={isExpanded} onClick={handleClick} label={label} />
+      </div>
       <div
-        className="grid transition-all duration-300 ease-out"
+        className={cn('grid transition-all duration-300 ease-out', isExpanded && 'mb-8')}
         style={{
           gridTemplateRows: isExpanded ? '1fr' : '0fr',
         }}
       >
         <div className="overflow-hidden">
-          <ThinkingContent>{children}</ThinkingContent>
+          <ThinkingContent isPart={true}>{children}</ThinkingContent>
         </div>
       </div>
-    </div>
+    </>
   );
 });
 

diff --git a/client/src/components/Chat/Messages/Content/Markdown.tsx b/client/src/components/Chat/Messages/Content/Markdown.tsx
@@ -17,7 +17,6 @@ import {
 import { Artifact, artifactPlugin } from '~/components/Artifacts/Artifact';
 import { langSubset, preprocessLaTeX, handleDoubleClick } from '~/utils';
 import CodeBlock from '~/components/Messages/Content/CodeBlock';
-import Thinking from '~/components/Artifacts/Thinking';
 import { useFileDownload } from '~/data-provider';
 import useLocalize from '~/hooks/useLocalize';
 import store from '~/store';
@@ -223,7 +222,6 @@ const Markdown = memo(({ content = '', showCursor, isLatestMessage }: TContentPr
               a,
               p,
               artifact: Artifact,
-              thinking: Thinking,
             } as {
               [nodeType: string]: React.ElementType;
             }

diff --git a/client/src/components/Chat/Messages/Content/MessageContent.tsx b/client/src/components/Chat/Messages/Content/MessageContent.tsx
@@ -1,9 +1,9 @@
-import { Fragment, Suspense, useMemo } from 'react';
+import { memo, Suspense, useMemo } from 'react';
 import { useRecoilValue } from 'recoil';
-import type { TMessage, TResPlugin } from 'librechat-data-provider';
+import type { TMessage } from 'librechat-data-provider';
 import type { TMessageContentProps, TDisplayProps } from '~/common';
-import Plugin from '~/components/Messages/Content/Plugin';
 import Error from '~/components/Messages/Content/Error';
+import Thinking from '~/components/Artifacts/Thinking';
 import { DelayedRender } from '~/components/ui';
 import { useChatContext } from '~/Providers';
 import MarkdownLite from './MarkdownLite';
@@ -117,7 +117,6 @@ export const UnfinishedMessage = ({ message }: { message: TMessage }) => (
   />
 );
 
-// Content Component
 const MessageContent = ({
   text,
   edit,
@@ -127,72 +126,49 @@ const MessageContent = ({
   isLast,
   ...props
 }: TMessageContentProps) => {
-  if (error) {
-    return <ErrorMessage message={props.message} text={text} />;
-  } else if (edit) {
-    return <EditMessage text={text} isSubmitting={isSubmitting} {...props} />;
-  } else {
-    const marker = ':::plugin:::\n';
-    const splitText = text.split(marker);
-    const { message } = props;
-    const { plugins, messageId } = message;
-    const displayedIndices = new Set<number>();
-    // Function to get the next non-empty text index
-    const getNextNonEmptyTextIndex = (currentIndex: number) => {
-      for (let i = currentIndex + 1; i < splitText.length; i++) {
-        // Allow the last index to be last in case it has text
-        // this may need to change if I add back streaming
-        if (i === splitText.length - 1) {
-          return currentIndex;
-        }
+  const { message } = props;
+  const { messageId } = message;
 
-        if (splitText[i].trim() !== '' && !displayedIndices.has(i)) {
-          return i;
-        }
-      }
-      return currentIndex; // If no non-empty text is found, return the current index
+  const { thinkingContent, regularContent } = useMemo(() => {
+    const thinkingMatch = text.match(/:::thinking([\s\S]*?):::/);
+    return {
+      thinkingContent: thinkingMatch ? thinkingMatch[1].trim() : '',
+      regularContent: thinkingMatch ? text.replace(/:::thinking[\s\S]*?:::/, '').trim() : text,
     };
+  }, [text]);
 
-    return splitText.map((text, idx) => {
-      let currentText = text.trim();
-      let plugin: TResPlugin | null = null;
-
-      if (plugins) {
-        plugin = plugins[idx];
-      }
+  const showRegularCursor = useMemo(() => isLast && isSubmitting, [isLast, isSubmitting]);
 
-      // If the current text is empty, get the next non-empty text index
-      const displayTextIndex = currentText === '' ? getNextNonEmptyTextIndex(idx) : idx;
-      currentText = splitText[displayTextIndex];
-      const isLastIndex = displayTextIndex === splitText.length - 1;
-      const isEmpty = currentText.trim() === '';
-      const showText =
-        (currentText && !isEmpty && !displayedIndices.has(displayTextIndex)) ||
-        (isEmpty && isLastIndex);
-      displayedIndices.add(displayTextIndex);
+  const unfinishedMessage = useMemo(
+    () =>
+      !isSubmitting && unfinished ? (
+        <Suspense>
+          <DelayedRender delay={250}>
+            <UnfinishedMessage message={message} />
+          </DelayedRender>
+        </Suspense>
+      ) : null,
+    [isSubmitting, unfinished, message],
+  );
 
-      return (
-        <Fragment key={idx}>
-          {plugin && <Plugin key={`plugin-${messageId}-${idx}`} plugin={plugin} />}
-          {showText ? (
-            <DisplayMessage
-              key={`display-${messageId}-${idx}`}
-              showCursor={isLastIndex && isLast && isSubmitting}
-              text={currentText}
-              {...props}
-            />
-          ) : null}
-          {!isSubmitting && unfinished && (
-            <Suspense>
-              <DelayedRender delay={250}>
-                <UnfinishedMessage message={message} key={`unfinished-${messageId}-${idx}`} />
-              </DelayedRender>
-            </Suspense>
-          )}
-        </Fragment>
-      );
-    });
+  if (error) {
+    return <ErrorMessage message={props.message} text={text} />;
+  } else if (edit) {
+    return <EditMessage text={text} isSubmitting={isSubmitting} {...props} />;
   }
+
+  return (
+    <>
+      {thinkingContent && <Thinking key={`thinking-${messageId}`}>{thinkingContent}</Thinking>}
+      <DisplayMessage
+        key={`display-${messageId}`}
+        showCursor={showRegularCursor}
+        text={regularContent}
+        {...props}
+      />
+      {unfinishedMessage}
+    </>
+  );
 };
 
-export default MessageContent;
+export default memo(MessageContent);
diff --git a/client/src/components/Chat/Messages/Content/Parts/Reasoning.tsx b/client/src/components/Chat/Messages/Content/Parts/Reasoning.tsx
@@ -18,7 +18,7 @@ const Reasoning = memo(({ reasoning }: ReasoningProps) => {
     <div
       className={cn(
         'grid transition-all duration-300 ease-out',
-        nextType !== ContentTypes.THINK && isExpanded && 'mb-10',
+        nextType !== ContentTypes.THINK && isExpanded && 'mb-8',
       )}
       style={{
         gridTemplateRows: isExpanded ? '1fr' : '0fr',

diff --git a/client/src/components/SidePanel/Parameters/settings.ts b/client/src/components/SidePanel/Parameters/settings.ts
@@ -3,6 +3,7 @@ import {
   EModelEndpoint,
   openAISettings,
   googleSettings,
+  ReasoningEffort,
   BedrockProviders,
   anthropicSettings,
 } from 'librechat-data-provider';
@@ -203,6 +204,19 @@ const openAIParams: Record<string, SettingDefinition> = {
     optionType: 'model',
     columnSpan: 2,
   },
+  reasoning_effort: {
+    key: 'reasoning_effort',
+    label: 'com_endpoint_reasoning_effort',
+    labelCode: true,
+    description: 'com_endpoint_openai_reasoning_effort',
+    descriptionCode: true,
+    type: 'enum',
+    default: ReasoningEffort.medium,
+    component: 'slider',
+    options: [ReasoningEffort.low, ReasoningEffort.medium, ReasoningEffort.high],
+    optionType: 'model',
+    columnSpan: 4,
+  },
 };
 
 const anthropic: Record<string, SettingDefinition> = {
@@ -446,13 +460,15 @@ const openAI: SettingsConfiguration = [
   baseDefinitions.stop,
   librechat.resendFiles,
   baseDefinitions.imageDetail,
+  openAIParams.reasoning_effort,
 ];
 
 const openAICol1: SettingsConfiguration = [
   baseDefinitions.model as SettingDefinition,
   openAIParams.chatGptLabel,
   librechat.promptPrefix,
   librechat.maxContextTokens,
+  openAIParams.reasoning_effort,
 ];
 
 const openAICol2: SettingsConfiguration = [

diff --git a/client/src/localization/languages/Eng.ts b/client/src/localization/languages/Eng.ts
@@ -578,6 +578,7 @@ export default {
   com_endpoint_top_k: 'Top K',
   com_endpoint_max_output_tokens: 'Max Output Tokens',
   com_endpoint_stop: 'Stop Sequences',
+  com_endpoint_reasoning_effort: 'Reasoning Effort',
   com_endpoint_stop_placeholder: 'Separate values by pressing `Enter`',
   com_endpoint_openai_max_tokens: `Optional \`max_tokens\` field, representing the maximum number of tokens that can be generated in the chat completion.
 
@@ -596,6 +597,8 @@ export default {
     'Resend all previously attached images. Note: this can significantly increase token cost and you may experience errors with many image attachments.',
   com_endpoint_openai_resend_files:
     'Resend all previously attached files. Note: this will increase token cost and you may experience errors with many attachments.',
+  com_endpoint_openai_reasoning_effort:
+    'o1 models only: constrains effort on reasoning for reasoning models. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.',
   com_endpoint_openai_detail:
     'The resolution for Vision requests. "Low" is cheaper and faster, "High" is more detailed and expensive, and "Auto" will automatically choose between the two based on the image resolution.',
   com_endpoint_openai_stop: 'Up to 4 sequences where the API will stop generating further tokens.',