From 2f27bad0d63f678502c9d3e9674c89ca00e276b7 Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:18:49 +1100 Subject: [PATCH] [8.x] [Security Assistant] Knowledge base switch to use `semantic_text` (#197007) (#198437) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Backport This will backport the following commits from `main` to `8.x`: - [[Security Assistant] Knowledge base switch to use `semantic_text` (#197007)](https://github.com/elastic/kibana/pull/197007) ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) Co-authored-by: Patryk Kopyciński --- .../src/field_maps/types.ts | 2 + .../knowledge_base/crud_kb_route.gen.ts | 1 + .../knowledge_base/crud_kb_route.schema.yaml | 2 + .../entries/use_knowledge_base_entries.ts | 3 + .../use_knowledge_base_status.test.tsx | 1 + .../use_knowledge_base_status.tsx | 20 +- .../knowledge_base_settings.tsx | 4 +- .../index.tsx | 14 +- .../index_entry_editor.tsx | 2 +- .../translations.ts | 7 + .../use_knowledge_base_table.tsx | 36 ++- .../setup_knowledge_base_button.tsx | 22 +- .../server/__mocks__/msearch_query.ts | 10 +- .../server/__mocks__/vector_search_query.ts | 10 +- .../anonymization_fields/helpers.ts | 14 +- .../conversations/helpers.ts | 14 +- .../conversations/update_conversation.ts | 2 +- .../create_knowledge_base_entry.ts | 54 +--- .../field_maps_configuration.ts | 8 + .../knowledge_base/helpers.ts | 43 ++- .../knowledge_base/index.ts | 266 ++++++++++++++---- .../knowledge_base/ingest_pipeline.ts | 39 ++- .../knowledge_base/types.ts | 3 + .../prompts/helpers.ts | 14 +- .../server/ai_assistant_service/helpers.ts | 3 + .../server/ai_assistant_service/index.ts | 14 +- .../lib/data_stream/documents_data_writer.ts | 17 +- .../content_loaders/security_labs_loader.ts | 33 ++- .../server/routes/knowledge_base/constants.ts | 1 + .../get_knowledge_base_status.test.ts | 2 + .../get_knowledge_base_status.ts | 11 +- .../plugins/elastic_assistant/tsconfig.json | 3 +- .../configs/ess.config.ts | 5 + 33 files changed, 485 insertions(+), 195 deletions(-) diff --git a/packages/kbn-data-stream-adapter/src/field_maps/types.ts b/packages/kbn-data-stream-adapter/src/field_maps/types.ts index 62f4c7c600036..1cdafc7c61809 100644 --- a/packages/kbn-data-stream-adapter/src/field_maps/types.ts +++ b/packages/kbn-data-stream-adapter/src/field_maps/types.ts @@ -54,6 +54,8 @@ export type FieldMap = Record< scaling_factor?: number; dynamic?: boolean | 'strict'; properties?: Record; + inference_id?: string; + copy_to?: string; } >; diff --git a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts index fd599f5798cdc..aad215021da81 100644 --- a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts +++ b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.gen.ts @@ -81,4 +81,5 @@ export const ReadKnowledgeBaseResponse = z.object({ is_setup_in_progress: z.boolean().optional(), pipeline_exists: z.boolean().optional(), security_labs_exists: z.boolean().optional(), + user_data_exists: z.boolean().optional(), }); diff --git a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml index a61e98602ab40..0e0f1e9267916 100644 --- a/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml +++ b/x-pack/packages/kbn-elastic-assistant-common/impl/schemas/knowledge_base/crud_kb_route.schema.yaml @@ -78,6 +78,8 @@ paths: type: boolean security_labs_exists: type: boolean + user_data_exists: + type: boolean 400: description: Generic Error content: diff --git a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts index b41119779b21d..0775ed2d27a36 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts +++ b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/entries/use_knowledge_base_entries.ts @@ -24,6 +24,7 @@ export interface UseKnowledgeBaseEntriesParams { signal?: AbortSignal | undefined; toasts?: IToasts; enabled?: boolean; // For disabling if FF is off + isRefetching?: boolean; // For enabling polling } const defaultQuery: FindKnowledgeBaseEntriesRequestQuery = { @@ -56,6 +57,7 @@ export const useKnowledgeBaseEntries = ({ signal, toasts, enabled = false, + isRefetching = false, }: UseKnowledgeBaseEntriesParams) => useQuery( KNOWLEDGE_BASE_ENTRY_QUERY_KEY, @@ -73,6 +75,7 @@ export const useKnowledgeBaseEntries = ({ enabled, keepPreviousData: true, initialData: { page: 1, perPage: 100, total: 0, data: [] }, + refetchInterval: isRefetching ? 30000 : false, onError: (error: IHttpFetchError) => { if (error.name !== 'AbortError') { toasts?.addError(error, { diff --git a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx index 80ce3d27d8dcb..83073b5770ba0 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.test.tsx @@ -34,6 +34,7 @@ const statusResponse = { elser_exists: true, index_exists: true, pipeline_exists: true, + security_labs_exists: true, }; const http = { diff --git a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx index 75e78f2a06948..45c6d011b46d4 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/assistant/api/knowledge_base/use_knowledge_base_status.tsx @@ -45,6 +45,8 @@ export const useKnowledgeBaseStatus = ({ { retry: false, keepPreviousData: true, + // Polling interval for Knowledge Base setup in progress + refetchInterval: (data) => (data?.is_setup_in_progress ? 30000 : false), // Deprecated, hoist to `queryCache` w/in `QueryClient. See: https://stackoverflow.com/a/76961109 onError: (error: IHttpFetchError) => { if (error.name !== 'AbortError') { @@ -86,12 +88,12 @@ export const useInvalidateKnowledgeBaseStatus = () => { * * @param kbStatus ReadKnowledgeBaseResponse */ -export const isKnowledgeBaseSetup = (kbStatus: ReadKnowledgeBaseResponse | undefined): boolean => { - return ( - (kbStatus?.elser_exists && - kbStatus?.security_labs_exists && - kbStatus?.index_exists && - kbStatus?.pipeline_exists) ?? - false - ); -}; +export const isKnowledgeBaseSetup = (kbStatus: ReadKnowledgeBaseResponse | undefined): boolean => + (kbStatus?.elser_exists && + kbStatus?.index_exists && + kbStatus?.pipeline_exists && + // Allows to use UI while importing Security Labs docs + (kbStatus?.security_labs_exists || + kbStatus?.is_setup_in_progress || + kbStatus?.user_data_exists)) ?? + false; diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx index a46ba652574f6..7041bf909601f 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings.tsx @@ -53,9 +53,9 @@ export const KnowledgeBaseSettings: React.FC = React.memo( const isSecurityLabsEnabled = kbStatus?.security_labs_exists ?? false; const isKnowledgeBaseSetup = (isElserEnabled && - isSecurityLabsEnabled && kbStatus?.index_exists && - kbStatus?.pipeline_exists) ?? + kbStatus?.pipeline_exists && + (isSecurityLabsEnabled || kbStatus?.user_data_exists)) ?? false; const isSetupInProgress = kbStatus?.is_setup_in_progress ?? false; const isSetupAvailable = kbStatus?.is_setup_available ?? false; diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx index 54ea159ff0589..bc2d60941679a 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index.tsx @@ -160,6 +160,7 @@ export const KnowledgeBaseSettingsManagement: React.FC = React.memo(({ d http, toasts, enabled: enableKnowledgeBaseByDefault, + isRefetching: kbStatus?.is_setup_in_progress, }); // Flyout Save/Cancel Actions @@ -190,13 +191,15 @@ export const KnowledgeBaseSettingsManagement: React.FC = React.memo(({ d indices.push(entry.index); } }); - return dataViews.getExistingIndices(indices); + + return indices.length ? dataViews.getExistingIndices(indices) : Promise.resolve([]); }, [entries.data]); const { getColumns } = useKnowledgeBaseTable(); const columns = useMemo( () => getColumns({ + isKbSetupInProgress: kbStatus?.is_setup_in_progress ?? false, existingIndices, isDeleteEnabled: (entry: KnowledgeBaseEntryResponse) => { return ( @@ -219,7 +222,14 @@ export const KnowledgeBaseSettingsManagement: React.FC = React.memo(({ d openFlyout(); }, }), - [entries.data, existingIndices, getColumns, hasManageGlobalKnowledgeBase, openFlyout] + [ + entries.data, + existingIndices, + getColumns, + hasManageGlobalKnowledgeBase, + kbStatus?.is_setup_in_progress, + openFlyout, + ] ); // Refresh button diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx index ff61c61ed7423..dfc3cd0086686 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/index_entry_editor.tsx @@ -117,7 +117,7 @@ export const IndexEntryEditor: React.FC = React.memo( dataViews.getFieldsForWildcard({ pattern: entry?.index ?? '', }), - [] + [entry?.index] ); const fieldOptions = useMemo( diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts index b311f373c214b..98af0eabea6b5 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/translations.ts @@ -372,3 +372,10 @@ export const MISSING_INDEX_TOOLTIP_CONTENT = i18n.translate( 'The index assigned to this knowledge base entry is unavailable. Check the permissions on the configured index, or that the index has not been deleted. You can update the index to be used for this knowledge entry, or delete the entry entirely.', } ); + +export const SECURITY_LABS_NOT_FULLY_LOADED = i18n.translate( + 'xpack.elasticAssistant.assistant.settings.knowledgeBaseSettingsManagement.securityLabsNotFullyLoadedTooltipContent', + { + defaultMessage: 'Security Labs content is not fully loaded. Click to reload.', + } +); diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx index 7180be139c286..cbdf97f116f7b 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/knowledge_base_settings_management/use_knowledge_base_table.tsx @@ -11,6 +11,7 @@ import { EuiBasicTableColumn, EuiIcon, EuiText, + EuiLoadingSpinner, EuiToolTip, } from '@elastic/eui'; import { css } from '@emotion/react'; @@ -29,11 +30,16 @@ import * as i18n from './translations'; import { BadgesColumn } from '../../assistant/common/components/assistant_settings_management/badges'; import { useInlineActions } from '../../assistant/common/components/assistant_settings_management/inline_actions'; import { isSystemEntry } from './helpers'; +import { SetupKnowledgeBaseButton } from '../setup_knowledge_base_button'; const AuthorColumn = ({ entry }: { entry: KnowledgeBaseEntryResponse }) => { const { userProfileService } = useAssistantContext(); const userProfile = useAsync(async () => { + if (isSystemEntry(entry) || entry.createdBy === 'unknown') { + return; + } + const profile = await userProfileService?.bulkGet<{ avatar: UserProfileAvatarData }>({ uids: new Set([entry.createdBy]), dataPath: 'avatar', @@ -45,7 +51,7 @@ const AuthorColumn = ({ entry }: { entry: KnowledgeBaseEntryResponse }) => { () => userProfile?.value?.username ?? 'Unknown', [userProfile?.value?.username] ); - const userAvatar = userProfile.value?.avatar; + const userAvatar = userProfile?.value?.avatar; const badgeItem = isSystemEntry(entry) ? 'Elastic' : userName; const userImage = isSystemEntry(entry) ? ( { isEditEnabled, onDeleteActionClicked, onEditActionClicked, + isKbSetupInProgress, }: { existingIndices?: string[]; isDeleteEnabled: (entry: KnowledgeBaseEntryResponse) => boolean; isEditEnabled: (entry: KnowledgeBaseEntryResponse) => boolean; onDeleteActionClicked: (entry: KnowledgeBaseEntryResponse) => void; onEditActionClicked: (entry: KnowledgeBaseEntryResponse) => void; + isKbSetupInProgress: boolean; }): Array> => { return [ { @@ -180,11 +188,27 @@ export const useKnowledgeBaseTable = () => { { name: i18n.COLUMN_ENTRIES, render: (entry: KnowledgeBaseEntryResponse) => { - return isSystemEntry(entry) - ? entry.text - : entry.type === DocumentEntryType.value - ? '1' - : '-'; + return isSystemEntry(entry) ? ( + <> + {`${entry.text}`} + {isKbSetupInProgress ? ( + + ) : ( + + + + )} + + ) : entry.type === DocumentEntryType.value ? ( + '1' + ) : ( + '-' + ); }, }, { diff --git a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx index d697fc7120d01..948e45232028c 100644 --- a/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx +++ b/x-pack/packages/kbn-elastic-assistant/impl/knowledge_base/setup_knowledge_base_button.tsx @@ -6,15 +6,16 @@ */ import React, { useCallback } from 'react'; -import { EuiButton, EuiButtonEmpty, EuiToolTip } from '@elastic/eui'; +import { EuiButton, EuiButtonIcon, EuiButtonEmpty, EuiToolTip } from '@elastic/eui'; import { i18n } from '@kbn/i18n'; +import { css } from '@emotion/react'; import { useAssistantContext } from '../..'; import { useSetupKnowledgeBase } from '../assistant/api/knowledge_base/use_setup_knowledge_base'; import { useKnowledgeBaseStatus } from '../assistant/api/knowledge_base/use_knowledge_base_status'; interface Props { - display?: 'mini'; + display?: 'mini' | 'refresh'; } /** @@ -48,6 +49,23 @@ export const SetupKnowledgeBaseButton: React.FC = React.memo(({ display } }) : undefined; + if (display === 'refresh') { + return ( + + ); + } + return ( {display === 'mini' ? ( diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts index e411dfaa2f1ef..ae5adcfab61aa 100644 --- a/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts @@ -34,12 +34,10 @@ export const mSearchQueryBody: MsearchQueryBody = { ], must: [ { - text_expansion: { - 'vector.tokens': { - model_id: '.elser_model_2', - model_text: - 'Generate an ESQL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', - }, + semantic: { + field: 'semantic_text', + query: + 'Generate an ESQL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', }, }, ], diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts index 30fbd0ad2c58f..04263c5d242bb 100644 --- a/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts @@ -26,12 +26,10 @@ export const mockVectorSearchQuery: QueryDslQueryContainer = { ], must: [ { - text_expansion: { - 'vector.tokens': { - model_id: '.elser_model_2', - model_text: - 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', - }, + semantic: { + field: 'semantic_text', + query: + 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', }, }, ], diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts index 9a4a3b6e1c0ce..0f577df4e56e1 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/anonymization_fields/helpers.ts @@ -99,7 +99,8 @@ export const getUpdateScript = ({ isPatch?: boolean; }) => { return { - source: ` + script: { + source: ` if (params.assignEmpty == true || params.containsKey('allowed')) { ctx._source.allowed = params.allowed; } @@ -108,11 +109,12 @@ export const getUpdateScript = ({ } ctx._source.updated_at = params.updated_at; `, - lang: 'painless', - params: { - ...anonymizationField, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + lang: 'painless', + params: { + ...anonymizationField, // when assigning undefined in painless, it will remove property and wil set it to null + // for patch we don't want to remove unspecified value in payload + assignEmpty: !(isPatch ?? true), + }, }, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts index 9e52b4a7414a6..bdd1107942cc1 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/helpers.ts @@ -15,7 +15,8 @@ export const getUpdateScript = ({ isPatch?: boolean; }) => { return { - source: ` + script: { + source: ` if (params.assignEmpty == true || params.containsKey('api_config')) { if (ctx._source.api_config != null) { if (params.assignEmpty == true || params.api_config.containsKey('connector_id')) { @@ -70,11 +71,12 @@ export const getUpdateScript = ({ } ctx._source.updated_at = params.updated_at; `, - lang: 'painless', - params: { - ...conversation, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + lang: 'painless', + params: { + ...conversation, // when assigning undefined in painless, it will remove property and wil set it to null + // for patch we don't want to remove unspecified value in payload + assignEmpty: !(isPatch ?? true), + }, }, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts index 807fea2decd99..7e9ee336f6fe1 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/conversations/update_conversation.ts @@ -76,7 +76,7 @@ export const updateConversation = async ({ }, }, refresh: true, - script: getUpdateScript({ conversation: params, isPatch }), + script: getUpdateScript({ conversation: params, isPatch }).script, }); if (response.failures && response.failures.length > 0) { diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts index 23f73501b1056..09bb5b291ef9a 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/create_knowledge_base_entry.ts @@ -139,55 +139,11 @@ export const getUpdateScript = ({ entry: UpdateKnowledgeBaseEntrySchema; isPatch?: boolean; }) => { + // Cannot use script for updating documents with semantic_text fields return { - source: ` - if (params.assignEmpty == true || params.containsKey('name')) { - ctx._source.name = params.name; - } - if (params.assignEmpty == true || params.containsKey('type')) { - ctx._source.type = params.type; - } - if (params.assignEmpty == true || params.containsKey('users')) { - ctx._source.users = params.users; - } - if (params.assignEmpty == true || params.containsKey('query_description')) { - ctx._source.query_description = params.query_description; - } - if (params.assignEmpty == true || params.containsKey('input_schema')) { - ctx._source.input_schema = params.input_schema; - } - if (params.assignEmpty == true || params.containsKey('output_fields')) { - ctx._source.output_fields = params.output_fields; - } - if (params.assignEmpty == true || params.containsKey('kb_resource')) { - ctx._source.kb_resource = params.kb_resource; - } - if (params.assignEmpty == true || params.containsKey('required')) { - ctx._source.required = params.required; - } - if (params.assignEmpty == true || params.containsKey('source')) { - ctx._source.source = params.source; - } - if (params.assignEmpty == true || params.containsKey('text')) { - ctx._source.text = params.text; - } - if (params.assignEmpty == true || params.containsKey('description')) { - ctx._source.description = params.description; - } - if (params.assignEmpty == true || params.containsKey('field')) { - ctx._source.field = params.field; - } - if (params.assignEmpty == true || params.containsKey('index')) { - ctx._source.index = params.index; - } - ctx._source.updated_at = params.updated_at; - ctx._source.updated_by = params.updated_by; - `, - lang: 'painless', - params: { - ...entry, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + doc: { + ...entry, + semantic_text: entry.text, }, }; }; @@ -247,7 +203,7 @@ export const transformToCreateSchema = ({ required: entry.required ?? false, source: entry.source, text: entry.text, - vector: undefined, + semantic_text: entry.text, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts index 0712664bbfeed..348efb5a18f7d 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/field_maps_configuration.ts @@ -6,6 +6,8 @@ */ import { FieldMap } from '@kbn/data-stream-adapter'; +export const ASSISTANT_ELSER_INFERENCE_ID = 'elastic-security-ai-assistant-elser2'; + export const knowledgeBaseFieldMap: FieldMap = { '@timestamp': { type: 'date', @@ -169,6 +171,12 @@ export const knowledgeBaseFieldMapV2: FieldMap = { required: false, }, // Embeddings field + semantic_text: { + type: 'semantic_text', + array: false, + required: false, + inference_id: ASSISTANT_ELSER_INFERENCE_ID, + }, vector: { type: 'object', array: false, diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts index 59816b0b0c264..a19b3f0945086 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts @@ -46,7 +46,7 @@ export const getKBVectorSearchQuery = ({ filter?: QueryDslQueryContainer | undefined; kbResource?: string | undefined; modelId: string; - query: string; + query?: string; required?: boolean | undefined; user: AuthenticatedUser; v2KnowledgeBaseEnabled: boolean; @@ -114,20 +114,37 @@ export const getKBVectorSearchQuery = ({ ], }; - return { - bool: { - must: [ - { - text_expansion: { - 'vector.tokens': { - model_id: modelId, - model_text: query, - }, + let semanticTextFilter: + | Array<{ semantic: { field: string; query: string } }> + | Array<{ + text_expansion: { 'vector.tokens': { model_id: string; model_text: string } }; + }> = []; + + if (v2KnowledgeBaseEnabled && query) { + semanticTextFilter = [ + { + semantic: { + field: 'semantic_text', + query, + }, + }, + ]; + } else if (!v2KnowledgeBaseEnabled) { + semanticTextFilter = [ + { + text_expansion: { + 'vector.tokens': { + model_id: modelId, + model_text: query as string, }, }, - ...requiredFilter, - ...resourceFilter, - ], + }, + ]; + } + + return { + bool: { + must: [...semanticTextFilter, ...requiredFilter, ...resourceFilter], ...userFilter, filter, minimum_should_match: 1, diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts index 64e7b00089c08..f985095661f3e 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts @@ -8,6 +8,7 @@ import { MlTrainedModelDeploymentNodesStats, MlTrainedModelStats, + SearchTotalHits, } from '@elastic/elasticsearch/lib/api/types'; import type { MlPluginSetup } from '@kbn/ml-plugin/server'; import type { KibanaRequest } from '@kbn/core-http-server'; @@ -25,6 +26,8 @@ import pRetry from 'p-retry'; import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { StructuredTool } from '@langchain/core/tools'; import { ElasticsearchClient } from '@kbn/core/server'; +import { IndexPatternsFetcher } from '@kbn/data-views-plugin/server'; +import { map } from 'lodash'; import { AIAssistantDataClient, AIAssistantDataClientParams } from '..'; import { AssistantToolParams, GetElser } from '../../types'; import { @@ -38,6 +41,7 @@ import { transformESSearchToKnowledgeBaseEntry } from './transforms'; import { ESQL_DOCS_LOADED_QUERY, SECURITY_LABS_RESOURCE, + USER_RESOURCE, } from '../../routes/knowledge_base/constants'; import { getKBVectorSearchQuery, @@ -45,7 +49,11 @@ import { isModelAlreadyExistsError, } from './helpers'; import { getKBUserFilter } from '../../routes/knowledge_base/entries/utils'; -import { loadSecurityLabs } from '../../lib/langchain/content_loaders/security_labs_loader'; +import { + loadSecurityLabs, + getSecurityLabsDocsCount, +} from '../../lib/langchain/content_loaders/security_labs_loader'; +import { ASSISTANT_ELSER_INFERENCE_ID } from './field_maps_configuration'; /** * Params for when creating KbDataClient in Request Context Factory. Useful if needing to modify @@ -169,30 +177,83 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { this.options.logger.debug(`Checking if ELSER model '${elserId}' is deployed...`); try { - const esClient = await this.options.elasticsearchClientPromise; - const getResponse = await esClient.ml.getTrainedModelsStats({ - model_id: elserId, - }); + if (this.isV2KnowledgeBaseEnabled) { + return await this.isInferenceEndpointExists(); + } else { + const esClient = await this.options.elasticsearchClientPromise; + const getResponse = await esClient.ml.getTrainedModelsStats({ + model_id: elserId, + }); - // For standardized way of checking deployment status see: https://github.com/elastic/elasticsearch/issues/106986 - const isReadyESS = (stats: MlTrainedModelStats) => - stats.deployment_stats?.state === 'started' && - stats.deployment_stats?.allocation_status.state === 'fully_allocated'; + // For standardized way of checking deployment status see: https://github.com/elastic/elasticsearch/issues/106986 + const isReadyESS = (stats: MlTrainedModelStats) => + stats.deployment_stats?.state === 'started' && + stats.deployment_stats?.allocation_status.state === 'fully_allocated'; - const isReadyServerless = (stats: MlTrainedModelStats) => - (stats.deployment_stats?.nodes as unknown as MlTrainedModelDeploymentNodesStats[]).some( - (node) => node.routing_state.routing_state === 'started' - ); + const isReadyServerless = (stats: MlTrainedModelStats) => + (stats.deployment_stats?.nodes as unknown as MlTrainedModelDeploymentNodesStats[])?.some( + (node) => node.routing_state.routing_state === 'started' + ); - return getResponse.trained_model_stats.some( - (stats) => isReadyESS(stats) || isReadyServerless(stats) - ); + return getResponse.trained_model_stats?.some( + (stats) => isReadyESS(stats) || isReadyServerless(stats) + ); + } } catch (e) { + this.options.logger.debug(`Error checking if ELSER model '${elserId}' is deployed: ${e}`); // Returns 404 if it doesn't exist return false; } }; + public isInferenceEndpointExists = async (): Promise => { + try { + const esClient = await this.options.elasticsearchClientPromise; + + return !!(await esClient.inference.get({ + inference_id: ASSISTANT_ELSER_INFERENCE_ID, + task_type: 'sparse_embedding', + })); + } catch (error) { + this.options.logger.debug( + `Error checking if Inference endpoint ${ASSISTANT_ELSER_INFERENCE_ID} exists: ${error}` + ); + return false; + } + }; + + public createInferenceEndpoint = async () => { + const elserId = await this.options.getElserId(); + this.options.logger.debug(`Deploying ELSER model '${elserId}'...`); + try { + const esClient = await this.options.elasticsearchClientPromise; + if (this.isV2KnowledgeBaseEnabled) { + await esClient.inference.put({ + task_type: 'sparse_embedding', + inference_id: ASSISTANT_ELSER_INFERENCE_ID, + inference_config: { + service: 'elasticsearch', + service_settings: { + adaptive_allocations: { + enabled: true, + min_number_of_allocations: 0, + max_number_of_allocations: 8, + }, + num_threads: 1, + model_id: elserId, + }, + task_settings: {}, + }, + }); + } + } catch (error) { + this.options.logger.error( + `Error creating inference endpoint for ELSER model '${elserId}':\n${error}` + ); + throw new Error(`Error creating inference endpoint for ELSER model '${elserId}':\n${error}`); + } + }; + /** * Downloads and deploys recommended ELSER (if not already), then loads ES|QL docs * @@ -238,8 +299,22 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { `Removed ${legacyESQL?.total} ESQL knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.` ); } + // Delete any existing Security Labs content + const securityLabsDocs = await esClient.deleteByQuery({ + index: this.indexTemplateAndPattern.alias, + query: { + bool: { + must: [{ terms: { kb_resource: [SECURITY_LABS_RESOURCE] } }], + }, + }, + }); + if (securityLabsDocs?.total) { + this.options.logger.info( + `Removed ${securityLabsDocs?.total} Security Labs knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.` + ); + } } catch (e) { - this.options.logger.info('No legacy ESQL knowledge base docs to delete'); + this.options.logger.info('No legacy ESQL or Security Labs knowledge base docs to delete'); } } @@ -259,19 +334,34 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { this.options.logger.debug(`ELSER model '${elserId}' is already installed`); } - const isDeployed = await this.isModelDeployed(); - if (!isDeployed) { - await this.deployModel(); - await pRetry( - async () => - (await this.isModelDeployed()) - ? Promise.resolve() - : Promise.reject(new Error('Model not deployed')), - { minTimeout: 2000, retries: 10 } - ); - this.options.logger.debug(`ELSER model '${elserId}' successfully deployed!`); + if (!this.isV2KnowledgeBaseEnabled) { + const isDeployed = await this.isModelDeployed(); + if (!isDeployed) { + await this.deployModel(); + await pRetry( + async () => + (await this.isModelDeployed()) + ? Promise.resolve() + : Promise.reject(new Error('Model not deployed')), + { minTimeout: 2000, retries: 10 } + ); + this.options.logger.debug(`ELSER model '${elserId}' successfully deployed!`); + } else { + this.options.logger.debug(`ELSER model '${elserId}' is already deployed`); + } } else { - this.options.logger.debug(`ELSER model '${elserId}' is already deployed`); + const inferenceExists = await this.isInferenceEndpointExists(); + if (!inferenceExists) { + await this.createInferenceEndpoint(); + + this.options.logger.debug( + `Inference endpoint for ELSER model '${elserId}' successfully deployed!` + ); + } else { + this.options.logger.debug( + `Inference endpoint for ELSER model '${elserId}' is already deployed` + ); + } } this.options.logger.debug(`Checking if Knowledge Base docs have been loaded...`); @@ -289,8 +379,9 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { this.options.setIsKBSetupInProgress(false); this.options.logger.error(`Error setting up Knowledge Base: ${e.message}`); throw new Error(`Error setting up Knowledge Base: ${e.message}`); + } finally { + this.options.setIsKBSetupInProgress(false); } - this.options.setIsKBSetupInProgress(false); }; /** @@ -385,15 +476,87 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { }; /** - * Returns if Security Labs KB docs have been loaded + * Returns if user's KB docs exists + */ + + public isUserDataExists = async (): Promise => { + const user = this.options.currentUser; + if (user == null) { + throw new Error( + 'Authenticated user not found! Ensure kbDataClient was initialized from a request.' + ); + } + + const esClient = await this.options.elasticsearchClientPromise; + const modelId = await this.options.getElserId(); + + try { + const vectorSearchQuery = getKBVectorSearchQuery({ + kbResource: USER_RESOURCE, + required: false, + user, + modelId, + v2KnowledgeBaseEnabled: this.options.v2KnowledgeBaseEnabled, + }); + + const result = await esClient.search({ + index: this.indexTemplateAndPattern.alias, + size: 0, + query: vectorSearchQuery, + track_total_hits: true, + }); + + return !!(result.hits?.total as SearchTotalHits).value; + } catch (e) { + this.options.logger.debug(`Error checking if user's KB docs exist: ${e.message}`); + return false; + } + }; + + /** + * Returns if allSecurity Labs KB docs have been loaded */ public isSecurityLabsDocsLoaded = async (): Promise => { - const securityLabsDocs = await this.getKnowledgeBaseDocumentEntries({ - query: '', - kbResource: SECURITY_LABS_RESOURCE, - required: false, - }); - return securityLabsDocs.length > 0; + const user = this.options.currentUser; + if (user == null) { + throw new Error( + 'Authenticated user not found! Ensure kbDataClient was initialized from a request.' + ); + } + + const expectedDocsCount = await getSecurityLabsDocsCount({ logger: this.options.logger }); + + const esClient = await this.options.elasticsearchClientPromise; + const modelId = await this.options.getElserId(); + + try { + const vectorSearchQuery = getKBVectorSearchQuery({ + kbResource: SECURITY_LABS_RESOURCE, + required: false, + user, + modelId, + v2KnowledgeBaseEnabled: this.options.v2KnowledgeBaseEnabled, + }); + + const result = await esClient.search({ + index: this.indexTemplateAndPattern.alias, + size: 0, + query: vectorSearchQuery, + track_total_hits: true, + }); + + const existingDocs = (result.hits?.total as SearchTotalHits).value; + + if (existingDocs !== expectedDocsCount) { + this.options.logger.debug( + `Security Labs docs are not loaded, existing docs: ${existingDocs}, expected docs: ${expectedDocsCount}` + ); + } + return existingDocs === expectedDocsCount; + } catch (e) { + this.options.logger.info(`Error checking if Security Labs docs are loaded: ${e.message}`); + return false; + } }; /** @@ -423,10 +586,10 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { const vectorSearchQuery = getKBVectorSearchQuery({ filter, kbResource, - modelId, query, required, user, + modelId, v2KnowledgeBaseEnabled: this.options.v2KnowledgeBaseEnabled, }); @@ -576,7 +739,9 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { } try { - const elserId = await this.options.getElserId(); + const elserId = this.isV2KnowledgeBaseEnabled + ? ASSISTANT_ELSER_INFERENCE_ID + : await this.options.getElserId(); const userFilter = getKBUserFilter(user); const results = await this.findDocuments({ // Note: This is a magic number to set some upward bound as to not blow the context with too @@ -595,14 +760,21 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { if (results) { const entries = transformESSearchToKnowledgeBaseEntry(results.data) as IndexEntry[]; - return entries.map((indexEntry) => { - return getStructuredToolForIndexEntry({ - indexEntry, - esClient, - logger: this.options.logger, - elserId, - }); - }); + const indexPatternFetcher = new IndexPatternsFetcher(esClient); + const existingIndices = await indexPatternFetcher.getExistingIndices(map(entries, 'index')); + return ( + entries + // Filter out any IndexEntries that don't have an existing index + .filter((entry) => existingIndices.includes(entry.index)) + .map((indexEntry) => { + return getStructuredToolForIndexEntry({ + indexEntry, + esClient, + logger: this.options.logger, + elserId, + }); + }) + ); } } catch (e) { this.options.logger.error(`kbDataClient.getAssistantTools() - Failed to fetch IndexEntries`); diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts index e11840b94e660..8f459848af420 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/ingest_pipeline.ts @@ -5,22 +5,31 @@ * 2.0. */ -// TODO: Ensure old pipeline is updated/replaced -export const knowledgeBaseIngestPipeline = ({ id, modelId }: { id: string; modelId: string }) => ({ +export const knowledgeBaseIngestPipeline = ({ + id, + modelId, + v2KnowledgeBaseEnabled, +}: { + id: string; + modelId: string; + v2KnowledgeBaseEnabled: boolean; +}) => ({ id, description: 'Embedding pipeline for Elastic AI Assistant ELSER Knowledge Base', - processors: [ - { - inference: { - if: 'ctx?.text != null', - model_id: modelId, - input_output: [ - { - input_field: 'text', - output_field: 'vector.tokens', + processors: !v2KnowledgeBaseEnabled + ? [ + { + inference: { + if: 'ctx?.text != null', + model_id: modelId, + input_output: [ + { + input_field: 'text', + output_field: 'vector.tokens', + }, + ], }, - ], - }, - }, - ], + }, + ] + : [], }); diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts index 3de1a15d79b2a..443d03941ccdd 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/types.ts @@ -27,6 +27,7 @@ export interface EsDocumentEntry { required: boolean; source: string; text: string; + semantic_text?: string; vector?: { tokens: Record; model_id: string; @@ -99,6 +100,7 @@ export interface UpdateKnowledgeBaseEntrySchema { required?: boolean; source?: string; text?: string; + semantic_text?: string; vector?: { tokens: Record; model_id: string; @@ -135,6 +137,7 @@ export interface CreateKnowledgeBaseEntrySchema { required?: boolean; source?: string; text?: string; + semantic_text?: string; vector?: { tokens: Record; model_id: string; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts index a4534972c8478..eb71270127b2a 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_data_clients/prompts/helpers.ts @@ -143,7 +143,8 @@ export const getUpdateScript = ({ isPatch?: boolean; }) => { return { - source: ` + script: { + source: ` if (params.assignEmpty == true || params.containsKey('content')) { ctx._source.content = params.content; } @@ -158,11 +159,12 @@ export const getUpdateScript = ({ } ctx._source.updated_at = params.updated_at; `, - lang: 'painless', - params: { - ...prompt, // when assigning undefined in painless, it will remove property and wil set it to null - // for patch we don't want to remove unspecified value in payload - assignEmpty: !(isPatch ?? true), + lang: 'painless', + params: { + ...prompt, // when assigning undefined in painless, it will remove property and wil set it to null + // for patch we don't want to remove unspecified value in payload + assignEmpty: !(isPatch ?? true), + }, }, }; }; diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts index 07da930320712..93338174364fc 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/helpers.ts @@ -54,6 +54,7 @@ interface CreatePipelineParams { esClient: ElasticsearchClient; id: string; modelId: string; + v2KnowledgeBaseEnabled: boolean; } /** @@ -70,12 +71,14 @@ export const createPipeline = async ({ esClient, id, modelId, + v2KnowledgeBaseEnabled, }: CreatePipelineParams): Promise => { try { const response = await esClient.ingest.putPipeline( knowledgeBaseIngestPipeline({ id, modelId, + v2KnowledgeBaseEnabled, }) ); diff --git a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts index bfdf8b96f44b0..a7b54dd5ca4be 100644 --- a/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts +++ b/x-pack/plugins/elastic_assistant/server/ai_assistant_service/index.ts @@ -97,7 +97,7 @@ export class AIAssistantService { this.knowledgeBaseDataStream = this.createDataStream({ resource: 'knowledgeBase', kibanaVersion: options.kibanaVersion, - fieldMap: knowledgeBaseFieldMap, // TODO: use V2 if FF is enabled + fieldMap: knowledgeBaseFieldMap, }); this.promptsDataStream = this.createDataStream({ resource: 'prompts', @@ -151,7 +151,9 @@ export class AIAssistantService { name: this.resourceNames.indexTemplate[resource], componentTemplateRefs: [this.resourceNames.componentTemplate[resource]], // Apply `default_pipeline` if pipeline exists for resource - ...(resource in this.resourceNames.pipelines + ...(resource in this.resourceNames.pipelines && + // Remove this param and initialization when the `assistantKnowledgeBaseByDefault` feature flag is removed + !(resource === 'knowledgeBase' && this.v2KnowledgeBaseEnabled) ? { template: { settings: { @@ -202,7 +204,12 @@ export class AIAssistantService { id: this.resourceNames.pipelines.knowledgeBase, }); // TODO: When FF is removed, ensure pipeline is re-created for those upgrading - if (!pipelineCreated || this.v2KnowledgeBaseEnabled) { + if ( + // Install for v1 + (!this.v2KnowledgeBaseEnabled && !pipelineCreated) || + // Upgrade from v1 to v2 + (pipelineCreated && this.v2KnowledgeBaseEnabled) + ) { this.options.logger.debug( `Installing ingest pipeline - ${this.resourceNames.pipelines.knowledgeBase}` ); @@ -210,6 +217,7 @@ export class AIAssistantService { esClient, id: this.resourceNames.pipelines.knowledgeBase, modelId: await this.getElserId(), + v2KnowledgeBaseEnabled: this.v2KnowledgeBaseEnabled, }); this.options.logger.debug(`Installed ingest pipeline: ${response}`); diff --git a/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts b/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts index 32b579fdeb71a..08892038a58b7 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/data_stream/documents_data_writer.ts @@ -34,7 +34,10 @@ interface BulkParams { documentsToCreate?: TCreateParams[]; documentsToUpdate?: TUpdateParams[]; documentsToDelete?: string[]; - getUpdateScript?: (document: TUpdateParams, updatedAt: string) => Script; + getUpdateScript?: ( + document: TUpdateParams, + updatedAt: string + ) => { script?: Script; doc?: TUpdateParams }; authenticatedUser?: AuthenticatedUser; } @@ -73,7 +76,7 @@ export class DocumentsDataWriter implements DocumentsDataWriter { body: await this.buildBulkOperations(params), }, { - // Increasing timout to 2min as KB docs were failing to load after 30s + // Increasing timeout to 2min as KB docs were failing to load after 30s requestTimeout: 120000, } ); @@ -151,7 +154,10 @@ export class DocumentsDataWriter implements DocumentsDataWriter { private getUpdateDocumentsQuery = async ( documentsToUpdate: TUpdateParams[], - getUpdateScript: (document: TUpdateParams, updatedAt: string) => Script, + getUpdateScript: ( + document: TUpdateParams, + updatedAt: string + ) => { script?: Script; doc?: TUpdateParams }, authenticatedUser?: AuthenticatedUser ) => { const updatedAt = new Date().toISOString(); @@ -196,10 +202,7 @@ export class DocumentsDataWriter implements DocumentsDataWriter { _source: true, }, }, - { - script: getUpdateScript(document, updatedAt), - upsert: { counter: 1 }, - }, + getUpdateScript(document, updatedAt), ]); }; diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts index 10566b3e5a1d5..f37e20df2bd98 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/security_labs_loader.ts @@ -5,13 +5,14 @@ * 2.0. */ +import globby from 'globby'; import { Logger } from '@kbn/core/server'; import { DirectoryLoader } from 'langchain/document_loaders/fs/directory'; import { TextLoader } from 'langchain/document_loaders/fs/text'; import { resolve } from 'path'; import { Document } from 'langchain/document'; import { Metadata } from '@kbn/elastic-assistant-common'; - +import pMap from 'p-map'; import { addRequiredKbResourceMetadata } from './add_required_kb_resource_metadata'; import { SECURITY_LABS_RESOURCE } from '../../../routes/knowledge_base/constants'; import { AIAssistantKnowledgeBaseDataClient } from '../../../ai_assistant_data_clients/knowledge_base'; @@ -42,10 +43,22 @@ export const loadSecurityLabs = async ( logger.info(`Loading ${docs.length} Security Labs docs into the Knowledge Base`); - const response = await kbDataClient.addKnowledgeBaseDocuments({ - documents: docs, - global: true, - }); + /** + * Ingest Security Labs docs into the Knowledge Base one by one to avoid blocking + * Inference Endpoint for too long + */ + + const response = ( + await pMap( + docs, + (singleDoc) => + kbDataClient.addKnowledgeBaseDocuments({ + documents: [singleDoc], + global: true, + }), + { concurrency: 1 } + ) + ).flat(); logger.info(`Loaded ${response?.length ?? 0} Security Labs docs into the Knowledge Base`); @@ -55,3 +68,13 @@ export const loadSecurityLabs = async ( return false; } }; + +export const getSecurityLabsDocsCount = async ({ logger }: { logger: Logger }): Promise => { + try { + return (await globby(`${resolve(__dirname, '../../../knowledge_base/security_labs')}/**/*.md`)) + ?.length; + } catch (e) { + logger.error(`Failed to get Security Labs source docs count\n${e}`); + return 0; + } +}; diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts index 89970611df0e9..8bf17027e751e 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/constants.ts @@ -12,3 +12,4 @@ export const KNOWLEDGE_BASE_INGEST_PIPELINE = '.kibana-elastic-ai-assistant-kb-i export const ESQL_DOCS_LOADED_QUERY = 'You can chain processing commands, separated by a pipe character: `|`.'; export const SECURITY_LABS_RESOURCE = 'security_labs'; +export const USER_RESOURCE = 'user'; diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts index 6244599a2af27..b30e5ac3653ad 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.test.ts @@ -38,6 +38,7 @@ describe('Get Knowledge Base Status Route', () => { isModelDeployed: jest.fn().mockResolvedValue(true), isSetupInProgress: false, isSecurityLabsDocsLoaded: jest.fn().mockResolvedValue(true), + isUserDataExists: jest.fn().mockResolvedValue(true), }); getKnowledgeBaseStatusRoute(server.router); @@ -58,6 +59,7 @@ describe('Get Knowledge Base Status Route', () => { is_setup_available: true, pipeline_exists: true, security_labs_exists: true, + user_data_exists: true, }); }); }); diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts index 833e674b68ffd..f278cd469ac0e 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts @@ -74,11 +74,18 @@ export const getKnowledgeBaseStatusRoute = (router: ElasticAssistantPluginRouter }; if (indexExists && isModelDeployed) { - const securityLabsExists = await kbDataClient.isSecurityLabsDocsLoaded(); + const securityLabsExists = v2KnowledgeBaseEnabled + ? await kbDataClient.isSecurityLabsDocsLoaded() + : true; + const userDataExists = v2KnowledgeBaseEnabled + ? await kbDataClient.isUserDataExists() + : true; + return response.ok({ body: { ...body, - security_labs_exists: v2KnowledgeBaseEnabled ? securityLabsExists : true, + security_labs_exists: securityLabsExists, + user_data_exists: userDataExists, }, }); } diff --git a/x-pack/plugins/elastic_assistant/tsconfig.json b/x-pack/plugins/elastic_assistant/tsconfig.json index 747a58ed930d3..d3436f28a1d3e 100644 --- a/x-pack/plugins/elastic_assistant/tsconfig.json +++ b/x-pack/plugins/elastic_assistant/tsconfig.json @@ -48,7 +48,8 @@ "@kbn/apm-utils", "@kbn/std", "@kbn/zod", - "@kbn/inference-plugin" + "@kbn/inference-plugin", + "@kbn/data-views-plugin" ], "exclude": [ "target/**/*", diff --git a/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts b/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts index 55860215ebfcc..7954db769a6d5 100644 --- a/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts +++ b/x-pack/test/security_solution_api_integration/test_suites/genai/knowledge_base/entries/trial_license_complete_tier/configs/ess.config.ts @@ -48,6 +48,11 @@ export default async function ({ readConfigFile }: FtrConfigProviderContext) { esTestCluster: { ...functionalConfig.get('esTestCluster'), ssl: false, + esJavaOpts: '-Xms4g -Xmx4g', + }, + mochaOpts: { + ...functionalConfig.get('mochaOpts'), + timeout: 360000 * 2, }, }; }