[Obs AI Assistant] Handle token limit error (elastic#175871)

When the user reaches the token limit they would previously see a generic toast. This PR improves this case by handling the error and showing a more user friendly message. ### Before <img width="1484" alt="image" src="https://github.com/elastic/kibana/assets/209966/de6559fe-a98b-4c99-8995-8d43d1a56032"> ### After <img width="1502" alt="image" src="https://github.com/elastic/kibana/assets/209966/2a17117f-aa66-45e1-83dd-59c8133814c7"> ### Possible additional enhancements - Disable the prompt to avoid new messages that will fail anyway - Show an error icon instead of the normal assistant avatar-
jloleysens · Feb 1, 2024 · 83e76cb · 83e76cb
1 parent 4a2afa6
commit 83e76cb
Show file tree

Hide file tree

Showing 6 changed files with 84 additions and 24 deletions.
diff --git a/x-pack/plugins/observability_ai_assistant/common/conversation_complete.ts b/x-pack/plugins/observability_ai_assistant/common/conversation_complete.ts
@@ -66,7 +66,14 @@ export type MessageAddEvent = StreamingChatResponseEventBase<
 
 export type ChatCompletionErrorEvent = StreamingChatResponseEventBase<
   StreamingChatResponseEventType.ChatCompletionError,
-  { error: { message: string; stack?: string; code?: ChatCompletionErrorCode } }
+  {
+    error: {
+      message: string;
+      stack?: string;
+      code?: ChatCompletionErrorCode;
+      meta?: Record<string, any>;
+    };
+  }
 >;
 
 export type StreamingChatResponseEvent =
@@ -83,33 +90,41 @@ export type StreamingChatResponseEventWithoutError = Exclude<
 
 export enum ChatCompletionErrorCode {
   InternalError = 'internalError',
-  NotFound = 'notFound',
+  NotFoundError = 'notFoundError',
   TokenLimitReachedError = 'tokenLimitReachedError',
 }
 
-export class ChatCompletionError extends Error {
-  code: ChatCompletionErrorCode;
+interface ErrorMetaAttributes {
+  [ChatCompletionErrorCode.InternalError]: {};
+  [ChatCompletionErrorCode.NotFoundError]: {};
+  [ChatCompletionErrorCode.TokenLimitReachedError]: {
+    tokenLimit?: number;
+    tokenCount?: number;
+  };
+}
 
-  constructor(code: ChatCompletionErrorCode, message: string) {
+export class ChatCompletionError<T extends ChatCompletionErrorCode> extends Error {
+  constructor(public code: T, message: string, public meta?: ErrorMetaAttributes[T]) {
     super(message);
-    this.code = code;
   }
 }
 
-export function createConversationNotFoundError() {
+export function createTokenLimitReachedError(tokenLimit?: number, tokenCount?: number) {
   return new ChatCompletionError(
-    ChatCompletionErrorCode.NotFound,
-    i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.conversationNotFoundError', {
-      defaultMessage: 'Conversation not found',
-    })
+    ChatCompletionErrorCode.TokenLimitReachedError,
+    i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.tokenLimitReachedError', {
+      defaultMessage: `Token limit reached. Token limit is {tokenLimit}, but the current conversation has {tokenCount} tokens.`,
+      values: { tokenLimit, tokenCount },
+    }),
+    { tokenLimit, tokenCount }
   );
 }
 
-export function createTokenLimitReachedError() {
+export function createConversationNotFoundError() {
   return new ChatCompletionError(
-    ChatCompletionErrorCode.TokenLimitReachedError,
-    i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.tokenLimitReachedError', {
-      defaultMessage: 'Token limit reached',
+    ChatCompletionErrorCode.NotFoundError,
+    i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.conversationNotFoundError', {
+      defaultMessage: 'Conversation not found',
     })
   );
 }
@@ -118,6 +133,15 @@ export function createInternalServerError(originalErrorMessage: string) {
   return new ChatCompletionError(ChatCompletionErrorCode.InternalError, originalErrorMessage);
 }
 
-export function isChatCompletionError(error: Error): error is ChatCompletionError {
+export function isTokenLimitReachedError(
+  error: Error
+): error is ChatCompletionError<ChatCompletionErrorCode.TokenLimitReachedError> {
+  return (
+    error instanceof ChatCompletionError &&
+    error.code === ChatCompletionErrorCode.TokenLimitReachedError
+  );
+}
+
+export function isChatCompletionError(error: Error): error is ChatCompletionError<any> {
   return error instanceof ChatCompletionError;
 }
diff --git a/...lugins/observability_ai_assistant/common/utils/throw_serialized_chat_completion_errors.ts b/...lugins/observability_ai_assistant/common/utils/throw_serialized_chat_completion_errors.ts
@@ -20,10 +20,12 @@ export function throwSerializedChatCompletionErrors() {
   ): Observable<Exclude<T, ChatCompletionErrorEvent>> => {
     return source$.pipe(
       tap((event) => {
+        // de-serialise error
         if (event.type === StreamingChatResponseEventType.ChatCompletionError) {
           const code = event.error.code ?? ChatCompletionErrorCode.InternalError;
           const message = event.error.message;
-          throw new ChatCompletionError(code, message);
+          const meta = event.error.meta;
+          throw new ChatCompletionError(code, message, meta);
         }
       }),
       filter(

diff --git a/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.ts b/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.ts
@@ -13,6 +13,7 @@ import { MessageRole, type Message } from '../../common';
 import {
   ConversationCreateEvent,
   ConversationUpdateEvent,
+  isTokenLimitReachedError,
   StreamingChatResponseEventType,
 } from '../../common/conversation_complete';
 import { getAssistantSetupMessage } from '../service/get_assistant_setup_message';
@@ -95,16 +96,36 @@ export function useChat({
 
   const handleError = useCallback(
     (error: Error) => {
-      notifications.toasts.addError(error, {
-        title: i18n.translate('xpack.observabilityAiAssistant.failedToLoadResponse', {
-          defaultMessage: 'Failed to load response from the AI Assistant',
-        }),
-      });
       if (error instanceof AbortError) {
         setChatState(ChatState.Aborted);
       } else {
         setChatState(ChatState.Error);
       }
+
+      if (isTokenLimitReachedError(error)) {
+        setMessages((msgs) => [
+          ...msgs,
+          {
+            '@timestamp': new Date().toISOString(),
+            message: {
+              content: i18n.translate('xpack.observabilityAiAssistant.tokenLimitError', {
+                defaultMessage:
+                  'The conversation has exceeded the token limit. The maximum token limit is **{tokenLimit}**, but the current conversation has **{tokenCount}** tokens. Please start a new conversation to continue.',
+                values: { tokenLimit: error.meta?.tokenLimit, tokenCount: error.meta?.tokenCount },
+              }),
+              role: MessageRole.Assistant,
+            },
+          },
+        ]);
+
+        return;
+      }
+
+      notifications.toasts.addError(error, {
+        title: i18n.translate('xpack.observabilityAiAssistant.failedToLoadResponse', {
+          defaultMessage: 'Failed to load response from the AI Assistant',
+        }),
+      });
     },
     [notifications.toasts]
   );

diff --git a/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts b/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts
@@ -22,6 +22,7 @@ import {
   createConversationNotFoundError,
   MessageAddEvent,
   StreamingChatResponseEventType,
+  createTokenLimitReachedError,
   type StreamingChatResponseEvent,
 } from '../../../common/conversation_complete';
 import {
@@ -459,6 +460,17 @@ export class ObservabilityAIAssistantClient {
       },
     });
 
+    if (executeResult.status === 'error' && executeResult?.serviceMessage) {
+      const tokenLimitRegex =
+        /This model's maximum context length is (\d+) tokens\. However, your messages resulted in (\d+) tokens/g;
+      const tokenLimitRegexResult = tokenLimitRegex.exec(executeResult.serviceMessage);
+
+      if (tokenLimitRegexResult) {
+        const [, tokenLimit, tokenCount] = tokenLimitRegexResult;
+        throw createTokenLimitReachedError(parseInt(tokenLimit, 10), parseInt(tokenCount, 10));
+      }
+    }
+
     if (executeResult.status === 'error') {
       throw internal(`${executeResult?.message} - ${executeResult?.serviceMessage}`);
     }

diff --git a/x-pack/plugins/observability_ai_assistant/server/service/util/observable_into_stream.ts b/x-pack/plugins/observability_ai_assistant/server/service/util/observable_into_stream.ts
@@ -29,6 +29,7 @@ export function observableIntoStream(
           message: error.message,
           stack: error.stack,
           code: isChatCompletionError(error) ? error.code : undefined,
+          meta: error.meta,
         },
         type: StreamingChatResponseEventType.ChatCompletionError,
       };

diff --git a/x-pack/test/observability_ai_assistant_api_integration/tests/chat/chat.spec.ts b/x-pack/test/observability_ai_assistant_api_integration/tests/chat/chat.spec.ts
@@ -170,8 +170,8 @@ export default function ApiTest({ getService }: FtrProviderContext) {
 
       const response = JSON.parse(data);
 
-      expect(response.message).to.contain(
-        `an error occurred while running the action - Status code: 400. Message: API Error: Bad Request - This model's maximum context length is 8192 tokens. However, your messages resulted in 11036 tokens. Please reduce the length of the messages.`
+      expect(response.message).to.be(
+        `Token limit reached. Token limit is 8192, but the current conversation has 11036 tokens.`
       );
     });