diff --git a/common/constants.ts b/common/constants.ts index 52d26775..05d911a8 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -91,6 +91,7 @@ export enum WORKFLOW_TYPE { MULTIMODAL_SEARCH = 'Multimodal search', HYBRID_SEARCH = 'Hybrid search', SENTIMENT_ANALYSIS = 'Sentiment analysis', + RAG = 'Retrieval-augmented generation', CUSTOM = 'Custom', UNKNOWN = 'Unknown', } @@ -103,6 +104,7 @@ export enum PROCESSOR_TYPE { SORT = 'sort', TEXT_CHUNKING = 'text_chunking', NORMALIZATION = 'normalization-processor', + COLLAPSE = 'collapse', } export enum MODEL_TYPE { @@ -180,12 +182,17 @@ export const DELIMITER_OPTIONAL_FIELDS = ['delimiter']; export const SHARED_OPTIONAL_FIELDS = ['max_chunk_limit', 'description', 'tag']; /** - * QUERY PRESETS + * DEFAULT FIELD VALUES */ export const DEFAULT_TEXT_FIELD = 'my_text'; export const DEFAULT_VECTOR_FIELD = 'my_embedding'; export const DEFAULT_IMAGE_FIELD = 'my_image'; export const DEFAULT_LABEL_FIELD = 'label'; +export const DEFAULT_LLM_RESPONSE_FIELD = 'llm_response'; + +/** + * QUERY PRESETS + */ export const VECTOR_FIELD_PATTERN = `{{vector_field}}`; export const TEXT_FIELD_PATTERN = `{{text_field}}`; export const IMAGE_FIELD_PATTERN = `{{image_field}}`; diff --git a/common/interfaces.ts b/common/interfaces.ts index 4fe68f4b..19a4a0bd 100644 --- a/common/interfaces.ts +++ b/common/interfaces.ts @@ -489,6 +489,7 @@ export type QuickConfigureFields = { imageField?: string; labelField?: string; embeddingLength?: number; + llmResponseField?: string; }; /** diff --git a/public/configs/search_response_processors/collapse_processor.ts b/public/configs/search_response_processors/collapse_processor.ts new file mode 100644 index 00000000..be0decb9 --- /dev/null +++ b/public/configs/search_response_processors/collapse_processor.ts @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { PROCESSOR_TYPE } from '../../../common'; +import { Processor } from '../processor'; + +/** + * The collapse processor config. Used in search flows. + */ +export class CollapseProcessor extends Processor { + constructor() { + super(); + this.type = PROCESSOR_TYPE.COLLAPSE; + this.name = 'Collapse Processor'; + this.fields = [ + { + id: 'field', + type: 'string', + }, + ]; + this.optionalFields = [ + { + id: 'context_prefix', + type: 'string', + }, + { + id: 'tag', + type: 'string', + }, + { + id: 'description', + type: 'string', + }, + { + id: 'ignore_failure', + type: 'boolean', + value: false, + }, + ]; + } +} diff --git a/public/configs/search_response_processors/index.ts b/public/configs/search_response_processors/index.ts index 71a5ece5..2e5c5e40 100644 --- a/public/configs/search_response_processors/index.ts +++ b/public/configs/search_response_processors/index.ts @@ -7,3 +7,4 @@ export * from './ml_search_response_processor'; export * from './split_search_response_processor'; export * from './sort_search_response_processor'; export * from './normalization_processor'; +export * from './collapse_processor'; diff --git a/public/general_components/general-component-styles.scss b/public/general_components/general-component-styles.scss index 68f9cebf..c8c1f9d4 100644 --- a/public/general_components/general-component-styles.scss +++ b/public/general_components/general-component-styles.scss @@ -1,5 +1,5 @@ .multi-select-filter { &--width { - width: 200px; + width: 300px; } } diff --git a/public/pages/workflow_detail/tools/tools.tsx b/public/pages/workflow_detail/tools/tools.tsx index f861b93f..c11f2be0 100644 --- a/public/pages/workflow_detail/tools/tools.tsx +++ b/public/pages/workflow_detail/tools/tools.tsx @@ -42,7 +42,7 @@ const inputTabs = [ }, { id: TAB_ID.QUERY, - name: 'Run queries', + name: 'Run query', disabled: false, }, { diff --git a/public/pages/workflow_detail/workflow_detail.test.tsx b/public/pages/workflow_detail/workflow_detail.test.tsx index bbd3ebaf..65953214 100644 --- a/public/pages/workflow_detail/workflow_detail.test.tsx +++ b/public/pages/workflow_detail/workflow_detail.test.tsx @@ -87,7 +87,7 @@ describe('WorkflowDetail Page with create ingestion option', () => { expect(getByText('Visual')).toBeInTheDocument(); expect(getByText('JSON')).toBeInTheDocument(); expect(getByRole('tab', { name: 'Run ingestion' })).toBeInTheDocument(); - expect(getByRole('tab', { name: 'Run queries' })).toBeInTheDocument(); + expect(getByRole('tab', { name: 'Run query' })).toBeInTheDocument(); expect(getByRole('tab', { name: 'Errors' })).toBeInTheDocument(); expect(getByRole('tab', { name: 'Resources' })).toBeInTheDocument(); diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/input_transform_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/input_transform_modal.tsx index 2aac3a98..2da16980 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/input_transform_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/input_transform_modal.tsx @@ -249,7 +249,7 @@ export function InputTransformModal(props: InputTransformModalProps) { index: values.search.index.name, body: JSON.stringify({ ...JSON.parse(values.search.request as string), - search_pipeline: curSearchPipeline, + search_pipeline: curSearchPipeline || {}, }), }, dataSourceId, diff --git a/public/pages/workflow_detail/workflow_inputs/processors_list.tsx b/public/pages/workflow_detail/workflow_inputs/processors_list.tsx index bfbdfe19..be24fa93 100644 --- a/public/pages/workflow_detail/workflow_inputs/processors_list.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processors_list.tsx @@ -25,6 +25,7 @@ import { } from '../../../../common'; import { formikToUiConfig } from '../../../utils'; import { + CollapseProcessor, MLIngestProcessor, MLSearchRequestProcessor, MLSearchResponseProcessor, @@ -290,6 +291,13 @@ export function ProcessorsList(props: ProcessorsListProps) { ); }, }, + { + name: 'Collapse Processor', + onClick: () => { + closePopover(); + addProcessor(new CollapseProcessor().toObj()); + }, + }, ], }, ]} diff --git a/public/pages/workflows/new_workflow/quick_configure_inputs.tsx b/public/pages/workflows/new_workflow/quick_configure_inputs.tsx index e3b61326..fd15c717 100644 --- a/public/pages/workflows/new_workflow/quick_configure_inputs.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_inputs.tsx @@ -19,6 +19,7 @@ import { COHERE_DIMENSIONS, DEFAULT_IMAGE_FIELD, DEFAULT_LABEL_FIELD, + DEFAULT_LLM_RESPONSE_FIELD, DEFAULT_TEXT_FIELD, DEFAULT_VECTOR_FIELD, MODEL_STATE, @@ -84,6 +85,13 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { }; break; } + case WORKFLOW_TYPE.RAG: { + defaultFieldValues = { + textField: DEFAULT_TEXT_FIELD, + llmResponseField: DEFAULT_LLM_RESPONSE_FIELD, + }; + break; + } case WORKFLOW_TYPE.CUSTOM: default: break; @@ -143,10 +151,7 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { return ( <> - {(props.workflowType === WORKFLOW_TYPE.SEMANTIC_SEARCH || - props.workflowType === WORKFLOW_TYPE.MULTIMODAL_SEARCH || - props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH || - props.workflowType === WORKFLOW_TYPE.SENTIMENT_ANALYSIS) && ( + {props.workflowType !== WORKFLOW_TYPE.CUSTOM ? ( <> @@ -209,6 +218,8 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { helpText={`The name of the text document field to be ${ props.workflowType === WORKFLOW_TYPE.SENTIMENT_ANALYSIS ? 'analyzed' + : props.workflowType === WORKFLOW_TYPE.RAG + ? 'used as context to the large language model (LLM)' : 'embedded' }`} > @@ -297,9 +308,26 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { /> )} + {props.workflowType === WORKFLOW_TYPE.RAG && ( + + { + setFieldValues({ + ...fieldValues, + llmResponseField: e.target.value, + }); + }} + /> + + )} - )} + ) : undefined} ); } diff --git a/public/pages/workflows/new_workflow/quick_configure_modal.tsx b/public/pages/workflows/new_workflow/quick_configure_modal.tsx index 62a3cf1d..316a13b4 100644 --- a/public/pages/workflows/new_workflow/quick_configure_modal.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_modal.tsx @@ -27,6 +27,7 @@ import { MapArrayFormValue, MapFormValue, ModelInterface, + PROCESSOR_TYPE, QuickConfigureFields, TEXT_FIELD_PATTERN, VECTOR, @@ -188,7 +189,7 @@ function injectQuickConfigureFields( case WORKFLOW_TYPE.MULTIMODAL_SEARCH: case WORKFLOW_TYPE.SENTIMENT_ANALYSIS: { if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) { - workflow.ui_metadata.config = updateIngestProcessorConfig( + workflow.ui_metadata.config = updateIngestProcessors( workflow.ui_metadata.config, quickConfigureFields, modelInterface, @@ -202,7 +203,7 @@ function injectQuickConfigureFields( (workflow.ui_metadata.config.search.request.value || '') as string, quickConfigureFields ); - workflow.ui_metadata.config = updateSearchRequestProcessorConfig( + workflow.ui_metadata.config = updateSearchRequestProcessors( workflow.ui_metadata.config, quickConfigureFields, modelInterface, @@ -211,6 +212,16 @@ function injectQuickConfigureFields( } break; } + case WORKFLOW_TYPE.RAG: { + if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) { + workflow.ui_metadata.config = updateSearchResponseProcessors( + workflow.ui_metadata.config, + quickConfigureFields, + modelInterface + ); + } + break; + } case WORKFLOW_TYPE.CUSTOM: case undefined: default: @@ -220,132 +231,197 @@ function injectQuickConfigureFields( return workflow; } -// prefill ML ingest processor config, if applicable -function updateIngestProcessorConfig( +// prefill ingest processor configs, if applicable +function updateIngestProcessors( config: WorkflowConfig, fields: QuickConfigureFields, modelInterface: ModelInterface | undefined, isVectorSearchUseCase: boolean ): WorkflowConfig { - config.ingest.enrich.processors[0].fields.forEach((field) => { - if (field.id === 'model' && fields.modelId) { - field.value = { id: fields.modelId }; - } - if (field.id === 'input_map') { - const inputMap = generateMapFromModelInputs(modelInterface); - if (fields.textField) { - if (inputMap.length > 0) { - inputMap[0] = { - ...inputMap[0], - value: fields.textField, - }; - } else { - inputMap.push({ - key: '', - value: fields.textField, - }); + config.ingest.enrich.processors.forEach((processor, idx) => { + // prefill ML inference + if (processor.type === PROCESSOR_TYPE.ML) { + config.ingest.enrich.processors[idx].fields.forEach((field) => { + if (field.id === 'model' && fields.modelId) { + field.value = { id: fields.modelId }; } - } - if (fields.imageField) { - if (inputMap.length > 1) { - inputMap[1] = { - ...inputMap[1], - value: fields.imageField, - }; - } else { - inputMap.push({ - key: '', - value: fields.imageField, - }); + if (field.id === 'input_map') { + const inputMap = generateMapFromModelInputs(modelInterface); + if (fields.textField) { + if (inputMap.length > 0) { + inputMap[0] = { + ...inputMap[0], + value: fields.textField, + }; + } else { + inputMap.push({ + key: '', + value: fields.textField, + }); + } + } + if (fields.imageField) { + if (inputMap.length > 1) { + inputMap[1] = { + ...inputMap[1], + value: fields.imageField, + }; + } else { + inputMap.push({ + key: '', + value: fields.imageField, + }); + } + } + field.value = [inputMap] as MapArrayFormValue; } - } - field.value = [inputMap] as MapArrayFormValue; - } - if (field.id === 'output_map') { - const outputMap = generateMapFromModelOutputs(modelInterface); - const defaultField = isVectorSearchUseCase - ? fields.vectorField - : fields.labelField; - if (defaultField) { - if (outputMap.length > 0) { - outputMap[0] = { - ...outputMap[0], - key: defaultField, - }; - } else { - outputMap.push({ key: defaultField, value: '' }); + if (field.id === 'output_map') { + const outputMap = generateMapFromModelOutputs(modelInterface); + const defaultField = isVectorSearchUseCase + ? fields.vectorField + : fields.labelField; + if (defaultField) { + if (outputMap.length > 0) { + outputMap[0] = { + ...outputMap[0], + key: defaultField, + }; + } else { + outputMap.push({ key: defaultField, value: '' }); + } + } + field.value = [outputMap] as MapArrayFormValue; } - } - field.value = [outputMap] as MapArrayFormValue; + }); } }); - return config; } -// prefill ML search request processor config, if applicable -// including populating placeholders in any pre-configured query_template -function updateSearchRequestProcessorConfig( +// prefill search request processor configs, if applicable +function updateSearchRequestProcessors( config: WorkflowConfig, fields: QuickConfigureFields, modelInterface: ModelInterface | undefined, isVectorSearchUseCase: boolean ): WorkflowConfig { - let defaultQueryValue = '' as string; - try { - defaultQueryValue = Object.keys( - flattie(JSON.parse(config.search?.request?.value as string)) - )[0]; - } catch {} - config.search.enrichRequest.processors[0].fields.forEach((field) => { - if (field.id === 'model' && fields.modelId) { - field.value = { id: fields.modelId }; + config.search.enrichRequest.processors.forEach((processor, idx) => { + // prefill ML inference + if (processor.type === PROCESSOR_TYPE.ML) { + let defaultQueryValue = '' as string; + try { + defaultQueryValue = Object.keys( + flattie(JSON.parse(config.search?.request?.value as string)) + )[0]; + } catch {} + config.search.enrichRequest.processors[idx].fields.forEach((field) => { + if (field.id === 'model' && fields.modelId) { + field.value = { id: fields.modelId }; + } + if (field.id === 'input_map') { + const inputMap = generateMapFromModelInputs(modelInterface); + if (inputMap.length > 0) { + inputMap[0] = { + ...inputMap[0], + value: defaultQueryValue, + }; + } else { + inputMap.push({ + key: '', + value: defaultQueryValue, + }); + } + field.value = [inputMap] as MapArrayFormValue; + } + if (field.id === 'output_map') { + const outputMap = generateMapFromModelOutputs(modelInterface); + const defaultKey = isVectorSearchUseCase ? VECTOR : defaultQueryValue; + if (outputMap.length > 0) { + outputMap[0] = { + ...outputMap[0], + key: defaultKey, + }; + } else { + outputMap.push({ + key: defaultKey, + value: '', + }); + } + field.value = [outputMap] as MapArrayFormValue; + } + }); + config.search.enrichRequest.processors[0].optionalFields = config.search.enrichRequest.processors[0].optionalFields?.map( + (optionalField) => { + let updatedOptionalField = optionalField; + if (optionalField.id === 'query_template') { + optionalField.value = injectPlaceholderValues( + (optionalField.value || '') as string, + fields + ); + } + return updatedOptionalField; + } + ); } - if (field.id === 'input_map') { - const inputMap = generateMapFromModelInputs(modelInterface); - if (inputMap.length > 0) { - inputMap[0] = { - ...inputMap[0], - value: defaultQueryValue, - }; - } else { - inputMap.push({ - key: '', - value: defaultQueryValue, - }); - } - field.value = [inputMap] as MapArrayFormValue; + }); + return config; +} + +// prefill response processor configs, if applicable +function updateSearchResponseProcessors( + config: WorkflowConfig, + fields: QuickConfigureFields, + modelInterface: ModelInterface | undefined +): WorkflowConfig { + config.search.enrichResponse.processors.forEach((processor, idx) => { + // prefill ML inference + if (processor.type === PROCESSOR_TYPE.ML) { + config.search.enrichResponse.processors[idx].fields.forEach((field) => { + if (field.id === 'model' && fields.modelId) { + field.value = { id: fields.modelId }; + } + if (field.id === 'input_map') { + const inputMap = generateMapFromModelInputs(modelInterface); + if (fields.textField) { + if (inputMap.length > 0) { + inputMap[0] = { + ...inputMap[0], + value: fields.textField, + }; + } else { + inputMap.push({ + key: '', + value: fields.textField, + }); + } + } + field.value = [inputMap] as MapArrayFormValue; + } + if (field.id === 'output_map') { + const outputMap = generateMapFromModelOutputs(modelInterface); + if (fields.llmResponseField) { + if (outputMap.length > 0) { + outputMap[0] = { + ...outputMap[0], + key: fields.llmResponseField, + }; + } else { + outputMap.push({ key: fields.llmResponseField, value: '' }); + } + } + field.value = [outputMap] as MapArrayFormValue; + } + }); } - if (field.id === 'output_map') { - const outputMap = generateMapFromModelOutputs(modelInterface); - const defaultKey = isVectorSearchUseCase ? VECTOR : defaultQueryValue; - if (outputMap.length > 0) { - outputMap[0] = { - ...outputMap[0], - key: defaultKey, - }; - } else { - outputMap.push({ - key: defaultKey, - value: '', - }); - } - field.value = [outputMap] as MapArrayFormValue; + // prefill collapse + if (processor.type === PROCESSOR_TYPE.COLLAPSE) { + config.search.enrichResponse.processors[idx].fields.forEach((field) => { + if (field.id === 'field' && fields.llmResponseField) { + field.value = fields.llmResponseField; + } + }); } }); - config.search.enrichRequest.processors[0].optionalFields = config.search.enrichRequest.processors[0].optionalFields?.map( - (optionalField) => { - let updatedOptionalField = optionalField; - if (optionalField.id === 'query_template') { - optionalField.value = injectPlaceholderValues( - (optionalField.value || '') as string, - fields - ); - } - return updatedOptionalField; - } - ); - return config; } diff --git a/public/pages/workflows/new_workflow/utils.ts b/public/pages/workflows/new_workflow/utils.ts index 841290b8..30b15854 100644 --- a/public/pages/workflows/new_workflow/utils.ts +++ b/public/pages/workflows/new_workflow/utils.ts @@ -5,8 +5,10 @@ import { snakeCase } from 'lodash'; import { + CollapseProcessor, MLIngestProcessor, MLSearchRequestProcessor, + MLSearchResponseProcessor, NormalizationProcessor, } from '../../../configs'; import { @@ -50,6 +52,10 @@ export function enrichPresetWorkflowWithUiMetadata( uiMetadata = fetchSentimentAnalysisMetadata(); break; } + case WORKFLOW_TYPE.RAG: { + uiMetadata = fetchRAGMetadata(); + break; + } default: { uiMetadata = fetchEmptyMetadata(); break; @@ -206,6 +212,18 @@ export function fetchSentimentAnalysisMetadata(): UIState { return baseState; } +export function fetchRAGMetadata(): UIState { + let baseState = fetchEmptyMetadata(); + baseState.type = WORKFLOW_TYPE.RAG; + baseState.config.ingest.index.name.value = generateId('my_index', 6); + baseState.config.search.request.value = customStringify(FETCH_ALL_QUERY); + baseState.config.search.enrichResponse.processors = [ + new MLSearchResponseProcessor().toObj(), + new CollapseProcessor().toObj(), + ]; + return baseState; +} + // Utility fn to process workflow names from their presentable/readable titles // on the UI, to a valid name format. // This leads to less friction if users decide to save the name later on. diff --git a/public/utils/config_to_template_utils.ts b/public/utils/config_to_template_utils.ts index 80e340d5..e0924461 100644 --- a/public/utils/config_to_template_utils.ts +++ b/public/utils/config_to_template_utils.ts @@ -315,6 +315,7 @@ export function processorConfigsToTemplateProcessors( } case PROCESSOR_TYPE.SPLIT: case PROCESSOR_TYPE.SORT: + case PROCESSOR_TYPE.COLLAPSE: default: { const formValues = processorConfigToFormik(processorConfig); let finalFormValues = {} as FormikValues; diff --git a/server/resources/templates/rag.json b/server/resources/templates/rag.json new file mode 100644 index 00000000..d0ad304c --- /dev/null +++ b/server/resources/templates/rag.json @@ -0,0 +1,14 @@ +{ + "name": "Retrieval-Augmented Generation", + "description": "A basic workflow containing the index and search pipeline configurations for performing basic retrieval-augmented generation", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.17.0", + "3.0.0" + ] + }, + "ui_metadata": { + "type": "Retrieval-augmented generation" + } +} \ No newline at end of file