Skip to content

Commit

Permalink
[Obs AI Assistant] Handle token limit error (elastic#175871)
Browse files Browse the repository at this point in the history
When the user reaches the token limit they would previously see a
generic toast. This PR improves this case by handling the error and
showing a more user friendly message.


### Before
<img width="1484" alt="image"
src="https://github.com/elastic/kibana/assets/209966/de6559fe-a98b-4c99-8995-8d43d1a56032">


### After
<img width="1502" alt="image"
src="https://github.com/elastic/kibana/assets/209966/2a17117f-aa66-45e1-83dd-59c8133814c7">

### Possible additional enhancements

- Disable the prompt to avoid new messages that will fail anyway
- Show an error icon instead of the normal assistant avatar-
  • Loading branch information
sorenlouv authored Feb 1, 2024
1 parent 4a2afa6 commit 83e76cb
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,14 @@ export type MessageAddEvent = StreamingChatResponseEventBase<

export type ChatCompletionErrorEvent = StreamingChatResponseEventBase<
StreamingChatResponseEventType.ChatCompletionError,
{ error: { message: string; stack?: string; code?: ChatCompletionErrorCode } }
{
error: {
message: string;
stack?: string;
code?: ChatCompletionErrorCode;
meta?: Record<string, any>;
};
}
>;

export type StreamingChatResponseEvent =
Expand All @@ -83,33 +90,41 @@ export type StreamingChatResponseEventWithoutError = Exclude<

export enum ChatCompletionErrorCode {
InternalError = 'internalError',
NotFound = 'notFound',
NotFoundError = 'notFoundError',
TokenLimitReachedError = 'tokenLimitReachedError',
}

export class ChatCompletionError extends Error {
code: ChatCompletionErrorCode;
interface ErrorMetaAttributes {
[ChatCompletionErrorCode.InternalError]: {};
[ChatCompletionErrorCode.NotFoundError]: {};
[ChatCompletionErrorCode.TokenLimitReachedError]: {
tokenLimit?: number;
tokenCount?: number;
};
}

constructor(code: ChatCompletionErrorCode, message: string) {
export class ChatCompletionError<T extends ChatCompletionErrorCode> extends Error {
constructor(public code: T, message: string, public meta?: ErrorMetaAttributes[T]) {
super(message);
this.code = code;
}
}

export function createConversationNotFoundError() {
export function createTokenLimitReachedError(tokenLimit?: number, tokenCount?: number) {
return new ChatCompletionError(
ChatCompletionErrorCode.NotFound,
i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.conversationNotFoundError', {
defaultMessage: 'Conversation not found',
})
ChatCompletionErrorCode.TokenLimitReachedError,
i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.tokenLimitReachedError', {
defaultMessage: `Token limit reached. Token limit is {tokenLimit}, but the current conversation has {tokenCount} tokens.`,
values: { tokenLimit, tokenCount },
}),
{ tokenLimit, tokenCount }
);
}

export function createTokenLimitReachedError() {
export function createConversationNotFoundError() {
return new ChatCompletionError(
ChatCompletionErrorCode.TokenLimitReachedError,
i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.tokenLimitReachedError', {
defaultMessage: 'Token limit reached',
ChatCompletionErrorCode.NotFoundError,
i18n.translate('xpack.observabilityAiAssistant.chatCompletionError.conversationNotFoundError', {
defaultMessage: 'Conversation not found',
})
);
}
Expand All @@ -118,6 +133,15 @@ export function createInternalServerError(originalErrorMessage: string) {
return new ChatCompletionError(ChatCompletionErrorCode.InternalError, originalErrorMessage);
}

export function isChatCompletionError(error: Error): error is ChatCompletionError {
export function isTokenLimitReachedError(
error: Error
): error is ChatCompletionError<ChatCompletionErrorCode.TokenLimitReachedError> {
return (
error instanceof ChatCompletionError &&
error.code === ChatCompletionErrorCode.TokenLimitReachedError
);
}

export function isChatCompletionError(error: Error): error is ChatCompletionError<any> {
return error instanceof ChatCompletionError;
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ export function throwSerializedChatCompletionErrors() {
): Observable<Exclude<T, ChatCompletionErrorEvent>> => {
return source$.pipe(
tap((event) => {
// de-serialise error
if (event.type === StreamingChatResponseEventType.ChatCompletionError) {
const code = event.error.code ?? ChatCompletionErrorCode.InternalError;
const message = event.error.message;
throw new ChatCompletionError(code, message);
const meta = event.error.meta;
throw new ChatCompletionError(code, message, meta);
}
}),
filter(
Expand Down
31 changes: 26 additions & 5 deletions x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { MessageRole, type Message } from '../../common';
import {
ConversationCreateEvent,
ConversationUpdateEvent,
isTokenLimitReachedError,
StreamingChatResponseEventType,
} from '../../common/conversation_complete';
import { getAssistantSetupMessage } from '../service/get_assistant_setup_message';
Expand Down Expand Up @@ -95,16 +96,36 @@ export function useChat({

const handleError = useCallback(
(error: Error) => {
notifications.toasts.addError(error, {
title: i18n.translate('xpack.observabilityAiAssistant.failedToLoadResponse', {
defaultMessage: 'Failed to load response from the AI Assistant',
}),
});
if (error instanceof AbortError) {
setChatState(ChatState.Aborted);
} else {
setChatState(ChatState.Error);
}

if (isTokenLimitReachedError(error)) {
setMessages((msgs) => [
...msgs,
{
'@timestamp': new Date().toISOString(),
message: {
content: i18n.translate('xpack.observabilityAiAssistant.tokenLimitError', {
defaultMessage:
'The conversation has exceeded the token limit. The maximum token limit is **{tokenLimit}**, but the current conversation has **{tokenCount}** tokens. Please start a new conversation to continue.',
values: { tokenLimit: error.meta?.tokenLimit, tokenCount: error.meta?.tokenCount },
}),
role: MessageRole.Assistant,
},
},
]);

return;
}

notifications.toasts.addError(error, {
title: i18n.translate('xpack.observabilityAiAssistant.failedToLoadResponse', {
defaultMessage: 'Failed to load response from the AI Assistant',
}),
});
},
[notifications.toasts]
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
createConversationNotFoundError,
MessageAddEvent,
StreamingChatResponseEventType,
createTokenLimitReachedError,
type StreamingChatResponseEvent,
} from '../../../common/conversation_complete';
import {
Expand Down Expand Up @@ -459,6 +460,17 @@ export class ObservabilityAIAssistantClient {
},
});

if (executeResult.status === 'error' && executeResult?.serviceMessage) {
const tokenLimitRegex =
/This model's maximum context length is (\d+) tokens\. However, your messages resulted in (\d+) tokens/g;
const tokenLimitRegexResult = tokenLimitRegex.exec(executeResult.serviceMessage);

if (tokenLimitRegexResult) {
const [, tokenLimit, tokenCount] = tokenLimitRegexResult;
throw createTokenLimitReachedError(parseInt(tokenLimit, 10), parseInt(tokenCount, 10));
}
}

if (executeResult.status === 'error') {
throw internal(`${executeResult?.message} - ${executeResult?.serviceMessage}`);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export function observableIntoStream(
message: error.message,
stack: error.stack,
code: isChatCompletionError(error) ? error.code : undefined,
meta: error.meta,
},
type: StreamingChatResponseEventType.ChatCompletionError,
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ export default function ApiTest({ getService }: FtrProviderContext) {

const response = JSON.parse(data);

expect(response.message).to.contain(
`an error occurred while running the action - Status code: 400. Message: API Error: Bad Request - This model's maximum context length is 8192 tokens. However, your messages resulted in 11036 tokens. Please reduce the length of the messages.`
expect(response.message).to.be(
`Token limit reached. Token limit is 8192, but the current conversation has 11036 tokens.`
);
});

Expand Down

0 comments on commit 83e76cb

Please sign in to comment.