diff --git a/common/constants.ts b/common/constants.ts index e28d84ca..6df2291f 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -86,6 +86,15 @@ export const OPENAI_DIMENSIONS = { [`text-embedding-ada-002`]: 1536, }; +// Amazon BedRock +export const BEDROCK_DIMENSIONS = { + [`amazon.titan-embed-text-v1`]: 1536, + [`amazon.titan-embed-text-v2`]: 1024, + [`amazon.titan-embed-image-v1`]: 1024, + [`cohere.embed-english-v3`]: 1024, // same as Cohere directly + [`cohere.embed-multilingual-v3`]: 1024, // same as Cohere directly +}; + /** * Various constants pertaining to Workflow configs */ @@ -135,24 +144,13 @@ export enum NODE_CATEGORY { * A base set of component classes / types. */ export enum COMPONENT_CLASS { - // Indexer-related classes - INDEXER = 'indexer', - KNN_INDEXER = 'knn_indexer', - // Retriever-related classes - RETRIEVER = 'retriever', - // Transformer-related classes + INDEX = 'index', + KNN_INDEX = 'knn_index', TRANSFORMER = 'transformer', - JSON_TO_JSON_TRANSFORMER = 'json_to_json_transformer', ML_TRANSFORMER = 'ml_transformer', - RESULTS_TRANSFORMER = 'results_transformer', - // Query-related classes - QUERY = 'query', - MATCH_QUERY = 'match_query', - NEURAL_QUERY = 'neural_query', - // Document-related classes + SEARCH_REQUEST = 'search_request', DOCUMENT = 'document', - // Results-related classes - RESULTS = 'results', + SEARCH_RESPONSE = 'search_response', } /** diff --git a/public/component_types/index.ts b/public/component_types/index.ts index ebdc369f..2aac8b4f 100644 --- a/public/component_types/index.ts +++ b/public/component_types/index.ts @@ -4,5 +4,5 @@ */ export * from './transformer'; -export * from './indexer'; +export * from './indices'; export * from './other'; diff --git a/public/component_types/indexer/base_indexer.ts b/public/component_types/indexer/base_indexer.ts deleted file mode 100644 index cfcc8b94..00000000 --- a/public/component_types/indexer/base_indexer.ts +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { COMPONENT_CLASS } from '../../../common'; -import { BaseComponent } from '../base_component'; - -/** - * A base indexer UI component - */ -export class BaseIndexer extends BaseComponent { - constructor() { - super(); - this.type = COMPONENT_CLASS.INDEXER; - this.label = 'Index'; - this.description = 'An OpenSearch index'; - this.inputs = [ - { - id: 'input', - label: 'Input', - acceptMultiple: false, - }, - ]; - this.outputs = [ - { - id: 'output', - label: 'Output', - }, - ]; - } -} diff --git a/public/component_types/indexer/knn_indexer.ts b/public/component_types/indexer/knn_indexer.ts deleted file mode 100644 index 498243dd..00000000 --- a/public/component_types/indexer/knn_indexer.ts +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { COMPONENT_CLASS } from '../../../common'; -import { BaseIndexer } from './base_indexer'; - -/** - * A specialized indexer component for vector/K-NN indices - */ -export class KnnIndexer extends BaseIndexer { - constructor() { - super(); - this.type = COMPONENT_CLASS.KNN_INDEXER; - this.label = 'K-NN Index'; - this.description = 'A specialized indexer for K-NN indices'; - } -} diff --git a/public/component_types/indices/base_index.ts b/public/component_types/indices/base_index.ts new file mode 100644 index 00000000..a014ad25 --- /dev/null +++ b/public/component_types/indices/base_index.ts @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { COMPONENT_CATEGORY, COMPONENT_CLASS } from '../../../common'; +import { BaseComponent } from '../base_component'; + +/** + * A basic index placeholder UI component. Input/output depends on ingest or search context. + * Does not have any functionality. + */ +export class BaseIndex extends BaseComponent { + constructor(category: COMPONENT_CATEGORY) { + super(); + this.type = COMPONENT_CLASS.INDEX; + this.label = 'Index'; + this.description = 'An OpenSearch index'; + this.inputs = [ + { + id: + category === COMPONENT_CATEGORY.INGEST + ? 'document' + : 'search_request', + label: + category === COMPONENT_CATEGORY.INGEST + ? 'Document' + : 'Search Request', + acceptMultiple: false, + }, + ]; + this.outputs = + category === COMPONENT_CATEGORY.INGEST + ? [] + : [ + { + id: 'search_response', + label: 'Search Response', + }, + ]; + } +} diff --git a/public/component_types/indexer/index.ts b/public/component_types/indices/index.ts similarity index 57% rename from public/component_types/indexer/index.ts rename to public/component_types/indices/index.ts index a4e85f53..d375a3a6 100644 --- a/public/component_types/indexer/index.ts +++ b/public/component_types/indices/index.ts @@ -3,5 +3,5 @@ * SPDX-License-Identifier: Apache-2.0 */ -export * from './base_indexer'; -export * from './knn_indexer'; +export * from './base_index'; +export * from './knn_index'; diff --git a/public/component_types/indices/knn_index.ts b/public/component_types/indices/knn_index.ts new file mode 100644 index 00000000..2cde5035 --- /dev/null +++ b/public/component_types/indices/knn_index.ts @@ -0,0 +1,20 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { COMPONENT_CATEGORY, COMPONENT_CLASS } from '../../../common'; +import { BaseIndex } from './base_index'; + +/** + * A basic knn index placeholder UI component. Input/output depends on ingest or search context. + * Does not have any functionality. + */ +export class KnnIndex extends BaseIndex { + constructor(category: COMPONENT_CATEGORY) { + super(category); + this.type = COMPONENT_CLASS.KNN_INDEX; + this.label = 'k-NN Index'; + this.description = 'A specialized k-NN index'; + } +} diff --git a/public/component_types/other/document.tsx b/public/component_types/other/document.tsx index 72af7689..fda645b7 100644 --- a/public/component_types/other/document.tsx +++ b/public/component_types/other/document.tsx @@ -7,7 +7,7 @@ import { COMPONENT_CLASS } from '../../../common'; import { BaseComponent } from '../base_component'; /** - * A basic Document placeholder UI component. + * A basic document placeholder UI component. * Does not have any functionality. */ export class Document extends BaseComponent { @@ -18,8 +18,8 @@ export class Document extends BaseComponent { this.description = 'A document to be ingested'; this.outputs = [ { - id: 'output', - label: 'Output', + id: 'document', + label: 'Document', }, ]; } diff --git a/public/component_types/other/index.ts b/public/component_types/other/index.ts index 85840547..30d85dbe 100644 --- a/public/component_types/other/index.ts +++ b/public/component_types/other/index.ts @@ -4,5 +4,5 @@ */ export * from './document'; -export * from './results'; -export * from './query'; +export * from './search_response'; +export * from './search_request'; diff --git a/public/component_types/other/query/index.ts b/public/component_types/other/query/index.ts deleted file mode 100644 index f6d76ea3..00000000 --- a/public/component_types/other/query/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -export * from './query'; -export * from './match_query'; -export * from './neural_query'; diff --git a/public/component_types/other/query/match_query.tsx b/public/component_types/other/query/match_query.tsx deleted file mode 100644 index 06cfe823..00000000 --- a/public/component_types/other/query/match_query.tsx +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { COMPONENT_CLASS } from '../../../../common'; -import { Query } from './query'; - -/** - * A basic match query placeholder UI component. - * Does not have any functionality. - */ -export class MatchQuery extends Query { - constructor() { - super(); - this.type = COMPONENT_CLASS.MATCH_QUERY; - this.label = 'Match Query'; - this.description = 'An OpenSearch match query'; - } -} diff --git a/public/component_types/other/query/neural_query.tsx b/public/component_types/other/query/neural_query.tsx deleted file mode 100644 index 49136209..00000000 --- a/public/component_types/other/query/neural_query.tsx +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { COMPONENT_CLASS } from '../../../../common'; -import { Query } from './query'; - -/** - * A basic neural query placeholder UI component. - * Does not have any functionality. - */ -export class NeuralQuery extends Query { - constructor() { - super(); - this.type = COMPONENT_CLASS.NEURAL_QUERY; - this.label = 'Neural query'; - this.description = 'An OpenSearch neural query'; - } -} diff --git a/public/component_types/other/query/query.tsx b/public/component_types/other/query/query.tsx deleted file mode 100644 index d9a61950..00000000 --- a/public/component_types/other/query/query.tsx +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { COMPONENT_CLASS } from '../../../../common'; -import { BaseComponent } from '../../base_component'; - -/** - * A basic query placeholder UI component. - * Does not have any functionality. - */ -export class Query extends BaseComponent { - constructor() { - super(); - this.type = COMPONENT_CLASS.QUERY; - this.label = 'Query'; - this.description = 'An OpenSearch query'; - this.outputs = [ - { - id: 'output', - label: 'Output', - }, - ]; - } -} diff --git a/public/component_types/other/results.tsx b/public/component_types/other/results.tsx deleted file mode 100644 index f787c665..00000000 --- a/public/component_types/other/results.tsx +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -import { COMPONENT_CLASS } from '../../../common'; -import { BaseComponent } from '../base_component'; - -/** - * A basic Results placeholder UI component. - * Does not have any functionality. - */ -export class Results extends BaseComponent { - constructor() { - super(); - this.type = COMPONENT_CLASS.RESULTS; - this.label = 'Results'; - this.description = 'OpenSearch results'; - this.inputs = [{ id: 'input', label: 'Input', acceptMultiple: false }]; - } -} diff --git a/public/component_types/other/search_request.tsx b/public/component_types/other/search_request.tsx new file mode 100644 index 00000000..ce5cba37 --- /dev/null +++ b/public/component_types/other/search_request.tsx @@ -0,0 +1,26 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { COMPONENT_CLASS } from '../../../common'; +import { BaseComponent } from '../base_component'; + +/** + * A basic search request placeholder UI component. + * Does not have any functionality. + */ +export class SearchRequest extends BaseComponent { + constructor() { + super(); + this.type = COMPONENT_CLASS.SEARCH_REQUEST; + this.label = 'Search Request'; + this.description = 'An OpenSearch search request'; + this.outputs = [ + { + id: 'search_request', + label: this.label, + }, + ]; + } +} diff --git a/public/component_types/other/search_response.tsx b/public/component_types/other/search_response.tsx new file mode 100644 index 00000000..6cb0c9c8 --- /dev/null +++ b/public/component_types/other/search_response.tsx @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +import { COMPONENT_CLASS } from '../../../common'; +import { BaseComponent } from '../base_component'; + +/** + * A basic search response placeholder UI component. + * Does not have any functionality. + */ +export class SearchResponse extends BaseComponent { + constructor() { + super(); + this.type = COMPONENT_CLASS.SEARCH_RESPONSE; + this.label = 'Search Response'; + this.description = 'OpenSearch search response'; + this.inputs = [ + { id: 'search_response', label: this.label, acceptMultiple: false }, + ]; + } +} diff --git a/public/component_types/transformer/base_transformer.ts b/public/component_types/transformer/base_transformer.ts index 94ee37e9..c57c15f1 100644 --- a/public/component_types/transformer/base_transformer.ts +++ b/public/component_types/transformer/base_transformer.ts @@ -3,29 +3,50 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { COMPONENT_CLASS } from '../../../common'; +import { COMPONENT_CLASS, PROCESSOR_CONTEXT } from '../../../common'; import { BaseComponent } from '../base_component'; /** - * A base transformer UI component + * A base transformer UI component representing ingest / search req / search resp processors. + * Input/output descriptions depends on the processor context (ingest, search request, or search response) */ export class BaseTransformer extends BaseComponent { - constructor(label: string, description: string) { + constructor(label: string, description: string, context: PROCESSOR_CONTEXT) { super(); this.type = COMPONENT_CLASS.TRANSFORMER; this.label = label; this.description = description; this.inputs = [ { - id: 'input', - label: 'Input', + id: + context === PROCESSOR_CONTEXT.INGEST + ? 'document' + : context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'search_request' + : 'search_response', + label: + context === PROCESSOR_CONTEXT.INGEST + ? 'Document' + : context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'Search Request' + : 'Search Response', acceptMultiple: false, }, ]; this.outputs = [ { - id: 'output', - label: 'Output', + id: + context === PROCESSOR_CONTEXT.INGEST + ? 'document' + : context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'search_request' + : 'search_response', + label: + context === PROCESSOR_CONTEXT.INGEST + ? 'Document' + : context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'Search Request' + : 'Search Response', }, ]; } diff --git a/public/component_types/transformer/ml_transformer.ts b/public/component_types/transformer/ml_transformer.ts index d3ff638f..cc54653e 100644 --- a/public/component_types/transformer/ml_transformer.ts +++ b/public/component_types/transformer/ml_transformer.ts @@ -3,19 +3,16 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { COMPONENT_CLASS } from '../../../common'; +import { COMPONENT_CLASS, PROCESSOR_CONTEXT } from '../../../common'; import { BaseTransformer } from './base_transformer'; /** - * A generic ML inference transformer. Can be used across ingest, search request, and search response. - * Under the hood, using the implemented ML inference processors. - * Ref (ingest): https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/ + * A base ML transformer UI component representing ML inference processors. + * Input/output descriptions depends on the processor context (ingest, search request, or search response) */ export class MLTransformer extends BaseTransformer { - constructor() { - super(); + constructor(context: PROCESSOR_CONTEXT) { + super('ML Processor', 'A general ML processor', context); this.type = COMPONENT_CLASS.ML_TRANSFORMER; - this.label = 'ML Processor'; - this.description = 'A general ML processor'; } } diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/configure_prompt_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/configure_prompt_modal.tsx index 4850a6c8..d0c8c1a8 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/configure_prompt_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/configure_prompt_modal.tsx @@ -204,7 +204,7 @@ export function ConfigurePromptModal(props: ConfigurePromptModalProps) { setSchemaPopoverOpen(!schemaPopoverOpen) } > - View full input schema + View input schema } > diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx index 2aefddcb..4c901b0c 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx @@ -57,6 +57,7 @@ import { } from '../../../../../store'; import { getCore } from '../../../../../services'; import { + generateArrayTransform, getDataSourceId, parseModelInputs, parseModelInputsObj, @@ -134,13 +135,24 @@ export function InputTransformModal(props: InputTransformModalProps) { !isEmpty(JSON.parse(sourceInput)) && selectedTransformOption !== undefined ) { - let sampleSourceInput = {}; + let sampleSourceInput = {} as {} | []; try { sampleSourceInput = JSON.parse(sourceInput); - const output = generateTransform( - sampleSourceInput, - map[selectedTransformOption] - ); + const output = + // Edge case: users are collapsing input docs into a single input field when many-to-one is selected + // fo input transforms on search response processors. + oneToOne === false && + props.context === PROCESSOR_CONTEXT.SEARCH_RESPONSE && + Array.isArray(sampleSourceInput) + ? generateArrayTransform( + sampleSourceInput as [], + map[selectedTransformOption] + ) + : generateTransform( + sampleSourceInput, + map[selectedTransformOption] + ); + setTransformedInput(customStringify(output)); } catch {} } else { @@ -471,7 +483,7 @@ export function InputTransformModal(props: InputTransformModalProps) { setPopoverOpen(!popoverOpen)} > - View model inputs + View input schema } > diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/processor_inputs.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/processor_inputs.tsx index 99d7dffd..d4dc2d98 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/processor_inputs.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/processor_inputs.tsx @@ -28,8 +28,6 @@ interface ProcessorInputsProps { context: PROCESSOR_CONTEXT; } -const PROCESSOR_INPUTS_SPACER_SIZE = 'm'; - // Component to dynamically render the processor inputs based on the processor types. // For most processors, we can use the standard/default ConfigFieldList components // for rendering the required and optional fields. For more complex processors, we have @@ -51,7 +49,6 @@ export function ProcessorInputs(props: ProcessorInputsProps) { baseConfigPath={props.baseConfigPath} context={props.context} /> - ); break; @@ -65,7 +62,6 @@ export function ProcessorInputs(props: ProcessorInputsProps) { baseConfigPath={props.baseConfigPath} context={props.context} /> - ); break; @@ -79,7 +75,6 @@ export function ProcessorInputs(props: ProcessorInputsProps) { baseConfigPath={props.baseConfigPath} context={props.context} /> - ); break; diff --git a/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx b/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx index c58641fc..4aed28cb 100644 --- a/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx +++ b/public/pages/workflow_detail/workflow_inputs/workflow_inputs.tsx @@ -555,7 +555,7 @@ export function WorkflowInputs(props: WorkflowInputsProps) { } } else { getCore().notifications.toasts.addDanger( - 'No valid document provided. Ensure it is a valid JSON array.' + 'No valid document(s) provided. Ensure it is a valid JSON array.' ); } } catch (error) { diff --git a/public/pages/workflows/new_workflow/new_workflow.test.tsx b/public/pages/workflows/new_workflow/new_workflow.test.tsx index 270dc0cc..15a6b172 100644 --- a/public/pages/workflows/new_workflow/new_workflow.test.tsx +++ b/public/pages/workflows/new_workflow/new_workflow.test.tsx @@ -14,8 +14,6 @@ import * as ReactReduxHooks from '../../../store/store'; import '@testing-library/jest-dom'; import { loadPresetWorkflowTemplates } from '../../../../test/utils'; import { INITIAL_ML_STATE } from '../../../../public/store'; -import { WORKFLOW_TYPE } from '../../../../common/constants'; -import { capitalizeEachWord } from '../../../../test/utils'; jest.mock('../../../services', () => { const { mockCoreServices } = require('../../../../test'); @@ -54,13 +52,13 @@ describe('NewWorkflow', () => { jest.spyOn(ReactReduxHooks, 'useAppDispatch').mockReturnValue(mockDispatch); }); - test('renders the preset workflow templates', () => { + test('renders the preset workflow names & descriptions', () => { + const presetWorkflows = loadPresetWorkflowTemplates(); const { getByPlaceholderText, getAllByText } = renderWithRouter(); expect(getByPlaceholderText('Search')).toBeInTheDocument(); - Object.values(WORKFLOW_TYPE).forEach((type) => { - if (type !== WORKFLOW_TYPE.UNKNOWN) { - expect(getAllByText(capitalizeEachWord(type))).toHaveLength(1); - } + presetWorkflows.forEach((workflow) => { + expect(getAllByText(workflow.name)).toHaveLength(1); + expect(getAllByText(workflow.description)).toHaveLength(1); }); }); diff --git a/public/pages/workflows/new_workflow/quick_configure_inputs.tsx b/public/pages/workflows/new_workflow/quick_configure_inputs.tsx index fd15c717..d83cd2f2 100644 --- a/public/pages/workflows/new_workflow/quick_configure_inputs.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_inputs.tsx @@ -16,6 +16,7 @@ import { EuiCompressedFieldNumber, } from '@elastic/eui'; import { + BEDROCK_DIMENSIONS, COHERE_DIMENSIONS, DEFAULT_IMAGE_FIELD, DEFAULT_LABEL_FIELD, @@ -130,9 +131,9 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { // @ts-ignore COHERE_DIMENSIONS[connector.parameters?.model] || // @ts-ignore - (OPENAI_DIMENSIONS[connector.parameters?.model] as - | number - | undefined); + OPENAI_DIMENSIONS[connector.parameters?.model] || + // @ts-ignore + BEDROCK_DIMENSIONS[connector.parameters?.model]; if (dimensions !== undefined) { setFieldValues({ ...fieldValues, @@ -161,6 +162,7 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { > ({ @@ -213,6 +216,7 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { { setFieldValues({ @@ -237,11 +242,13 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { {props.workflowType === WORKFLOW_TYPE.MULTIMODAL_SEARCH && ( <> { setFieldValues({ @@ -259,11 +266,13 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH) && ( <> { setFieldValues({ @@ -275,11 +284,13 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { { setFieldValues({ @@ -293,11 +304,13 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { )} {props.workflowType === WORKFLOW_TYPE.SENTIMENT_ANALYSIS && ( { setFieldValues({ @@ -310,11 +323,13 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) { )} {props.workflowType === WORKFLOW_TYPE.RAG && ( { setFieldValues({ diff --git a/public/pages/workflows/new_workflow/quick_configure_modal.tsx b/public/pages/workflows/new_workflow/quick_configure_modal.tsx index 07255dbc..56db5852 100644 --- a/public/pages/workflows/new_workflow/quick_configure_modal.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_modal.tsx @@ -105,11 +105,13 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { { diff --git a/public/utils/config_to_workspace_utils.ts b/public/utils/config_to_workspace_utils.ts index 1aaa6ea2..a24a51e0 100644 --- a/public/utils/config_to_workspace_utils.ts +++ b/public/utils/config_to_workspace_utils.ts @@ -18,14 +18,15 @@ import { PROCESSOR_TYPE, IComponent, IComponentData, + PROCESSOR_CONTEXT, } from '../../common'; import { Document, - BaseIndexer, MLTransformer, BaseTransformer, - Query, - Results, + SearchResponse, + SearchRequest, + BaseIndex, } from '../component_types'; import { generateId } from './utils'; @@ -111,14 +112,17 @@ function ingestConfigToWorkspaceFlow( parentNode: parentNode.id, extent: 'parent', } as ReactFlowComponent; - const indexNodeId = generateId(COMPONENT_CLASS.KNN_INDEXER); + const indexNodeId = generateId(COMPONENT_CLASS.INDEX); const indexNode = { id: indexNodeId, position: { - x: parentNode.style.width - (NODE_WIDTH + NODE_SPACING), + x: parentNode?.style?.width - (NODE_WIDTH + NODE_SPACING), y: NODE_HEIGHT_Y, }, - data: initComponentData(new BaseIndexer().toObj(), indexNodeId), + data: initComponentData( + new BaseIndex(COMPONENT_CATEGORY.INGEST).toObj(), + indexNodeId + ), type: NODE_CATEGORY.CUSTOM, parentNode: parentNode.id, extent: 'parent', @@ -128,6 +132,7 @@ function ingestConfigToWorkspaceFlow( // Get nodes/edges from the sub-configurations const enrichWorkspaceFlow = processorsConfigToWorkspaceFlow( ingestConfig.enrich, + PROCESSOR_CONTEXT.INGEST, parentNode.id, NODE_WIDTH + NODE_SPACING * 2 // node padding + (width of doc node) + node padding ); @@ -202,55 +207,60 @@ function searchConfigToWorkspaceFlow( // Get nodes/edges from the processor sub-configurations const enrichRequestWorkspaceFlow = processorsConfigToWorkspaceFlow( searchConfig.enrichRequest, + PROCESSOR_CONTEXT.SEARCH_REQUEST, parentNode.id, - NODE_WIDTH + NODE_SPACING * 2 // node padding + (width of query node) + node padding + NODE_WIDTH + NODE_SPACING * 2 // node padding + (width of searchRequest node) + node padding ); const enrichResponseWorkspaceFlow = processorsConfigToWorkspaceFlow( searchConfig.enrichResponse, + PROCESSOR_CONTEXT.SEARCH_RESPONSE, parentNode.id, NODE_SPACING + (NODE_WIDTH + NODE_SPACING) * - (enrichRequestWorkspaceFlow.nodes.length + 2) // node padding + (width + padding of query node, any request processor nodes, and index node) + (enrichRequestWorkspaceFlow.nodes.length + 2) // node padding + (width + padding of searchRequest node, any request processor nodes, and index node) ); - // By default, always include a query node, an index node, and a results node. - const queryNodeId = generateId(COMPONENT_CLASS.QUERY); - const queryNode = { - id: queryNodeId, + // By default, always include a search request node, an index node, and a search response node. + const searchRequestNodeId = generateId(COMPONENT_CLASS.SEARCH_REQUEST); + const searchRequestNode = { + id: searchRequestNodeId, position: { x: 100, y: 70 }, - data: initComponentData(new Query().toObj(), queryNodeId), + data: initComponentData(new SearchRequest().toObj(), searchRequestNodeId), type: NODE_CATEGORY.CUSTOM, parentNode: parentNode.id, extent: 'parent', } as ReactFlowComponent; - const indexNodeId = generateId(COMPONENT_CLASS.KNN_INDEXER); + const indexNodeId = generateId(COMPONENT_CLASS.INDEX); const indexNode = { id: indexNodeId, position: { x: - parentNode.style.width - + parentNode?.style?.width - (NODE_WIDTH + NODE_SPACING) * (enrichResponseWorkspaceFlow.nodes.length + 2), y: NODE_HEIGHT_Y, }, - data: initComponentData(new BaseIndexer().toObj(), indexNodeId), + data: initComponentData( + new BaseIndex(COMPONENT_CATEGORY.SEARCH).toObj(), + indexNodeId + ), type: NODE_CATEGORY.CUSTOM, parentNode: parentNode.id, extent: 'parent', } as ReactFlowComponent; - const resultsNodeId = generateId(COMPONENT_CLASS.RESULTS); - const resultsNode = { - id: resultsNodeId, + const searchResponseNodeId = generateId(COMPONENT_CLASS.SEARCH_RESPONSE); + const searchResponseNode = { + id: searchResponseNodeId, position: { - x: parentNode.style.width - (NODE_WIDTH + NODE_SPACING), + x: parentNode?.style?.width - (NODE_WIDTH + NODE_SPACING), y: NODE_HEIGHT_Y, }, - data: initComponentData(new Results().toObj(), resultsNodeId), + data: initComponentData(new SearchResponse().toObj(), searchResponseNodeId), type: NODE_CATEGORY.CUSTOM, parentNode: parentNode.id, extent: 'parent', } as ReactFlowComponent; - nodes.push(queryNode, indexNode, resultsNode); + nodes.push(searchRequestNode, indexNode, searchResponseNode); nodes.push( ...enrichRequestWorkspaceFlow.nodes, @@ -264,11 +274,11 @@ function searchConfigToWorkspaceFlow( // Link up the set of localized nodes/edges per sub-workflow edges.push( ...getSearchEdges( - queryNode, + searchRequestNode, enrichRequestWorkspaceFlow, indexNode, enrichResponseWorkspaceFlow, - resultsNode + searchResponseNode ) ); @@ -282,6 +292,7 @@ function searchConfigToWorkspaceFlow( // based on the list of processors in a config function processorsConfigToWorkspaceFlow( processorsConfig: ProcessorsConfig, + context: PROCESSOR_CONTEXT, parentNodeId: string, xPosition: number ): WorkspaceFlowState { @@ -292,43 +303,58 @@ function processorsConfigToWorkspaceFlow( processorsConfig.processors.forEach((processorConfig) => { let transformer = {} as BaseTransformer; - let transformerNodeId = ''; switch (processorConfig.type) { case PROCESSOR_TYPE.ML: { - transformer = new MLTransformer(); - transformerNodeId = generateId(COMPONENT_CLASS.ML_TRANSFORMER); + transformer = new MLTransformer(context); break; } case PROCESSOR_TYPE.SPLIT: { transformer = new BaseTransformer( processorConfig.name, - 'A processor to split a string field into an array of substrings' + 'Split a string field into an array of substrings', + context ); - transformerNodeId = generateId(COMPONENT_CLASS.TRANSFORMER); break; } case PROCESSOR_TYPE.SORT: { transformer = new BaseTransformer( processorConfig.name, - 'A processor to sort an array of items in either ascending or descending order' + 'Sort an array of items in either ascending or descending order', + context ); - transformerNodeId = generateId(COMPONENT_CLASS.TRANSFORMER); break; } case PROCESSOR_TYPE.TEXT_CHUNKING: { transformer = new BaseTransformer( processorConfig.name, - 'A processor to split long documents into shorter passages' + 'Split long documents into shorter passages', + context ); - transformerNodeId = generateId(COMPONENT_CLASS.TRANSFORMER); + break; + } + case PROCESSOR_TYPE.NORMALIZATION: { + transformer = new BaseTransformer( + processorConfig.name, + 'Normalize and combine document scores from different query clauses', + context + ); + break; + } + case PROCESSOR_TYPE.COLLAPSE: { + transformer = new BaseTransformer( + processorConfig.name, + 'Discard hits with duplicate values', + context + ); + break; } default: { - transformer = new BaseTransformer(processorConfig.name, ''); - transformerNodeId = generateId(COMPONENT_CLASS.TRANSFORMER); + transformer = new BaseTransformer(processorConfig.name, '', context); break; } } + const transformerNodeId = generateId(transformer.type); nodes.push({ id: transformerNodeId, position: { x: xPosition, y: NODE_HEIGHT_Y }, @@ -355,11 +381,11 @@ function processorsConfigToWorkspaceFlow( // Given the set of localized flows per sub-configuration, generate the global search-level edges. // This takes the assumption the flow is linear, and all sub-configuration flows are fully connected. function getSearchEdges( - queryNode: ReactFlowComponent, + searchRequestNode: ReactFlowComponent, enrichRequestFlow: WorkspaceFlowState, indexNode: ReactFlowComponent, enrichResponseFlow: WorkspaceFlowState, - resultsNode: ReactFlowComponent + searchResponseNode: ReactFlowComponent ): ReactFlowEdge[] { const startAndEndNodesEnrichRequest = getStartAndEndNodes(enrichRequestFlow); const startAndEndNodesEnrichResponse = getStartAndEndNodes( @@ -375,7 +401,7 @@ function getSearchEdges( ...([ generateReactFlowEdge( requestToEnrichRequestEdgeId, - queryNode.id, + searchRequestNode.id, startAndEndNodesEnrichRequest.startNode.id ), @@ -389,14 +415,18 @@ function getSearchEdges( } else { const requestToIndexEdgeId = generateId('edge'); edges.push( - generateReactFlowEdge(requestToIndexEdgeId, queryNode.id, indexNode.id) + generateReactFlowEdge( + requestToIndexEdgeId, + searchRequestNode.id, + indexNode.id + ) ); } // Users may omit search response processors altogether. Need to handle cases separately. if (startAndEndNodesEnrichResponse !== undefined) { const indexToEnrichResponseEdgeId = generateId('edge'); - const enrichResponseToResultsEdgeId = generateId('edge'); + const enrichResponseToSearchResponseEdgeId = generateId('edge'); edges.push( ...([ @@ -406,16 +436,20 @@ function getSearchEdges( startAndEndNodesEnrichResponse.startNode.id ), generateReactFlowEdge( - enrichResponseToResultsEdgeId, + enrichResponseToSearchResponseEdgeId, startAndEndNodesEnrichResponse.endNode.id, - resultsNode.id + searchResponseNode.id ), ] as ReactFlowEdge[]) ); } else { - const indexToResultsEdgeId = generateId('edge'); + const indexToSearchResponseEdgeId = generateId('edge'); edges.push( - generateReactFlowEdge(indexToResultsEdgeId, indexNode.id, resultsNode.id) + generateReactFlowEdge( + indexToSearchResponseEdgeId, + indexNode.id, + searchResponseNode.id + ) ); } diff --git a/public/utils/utils.ts b/public/utils/utils.ts index 2b607afb..f03d5c61 100644 --- a/public/utils/utils.ts +++ b/public/utils/utils.ts @@ -178,17 +178,35 @@ export function unwrapTransformedDocs( // ML inference processors will use standard dot notation or JSONPath depending on the input. // We follow the same logic here to generate consistent results. -// Collapse the values depending on if the input is an array or not. -export function generateTransform(input: {} | [], map: MapFormValue): {} | [] { +export function generateTransform(input: {} | [], map: MapFormValue): {} { let output = {}; map.forEach((mapEntry) => { - const path = mapEntry.value; try { - const transformedResult = Array.isArray(input) - ? input.map((inputEntry) => - getTransformedResult(mapEntry, inputEntry, path) - ) - : getTransformedResult(mapEntry, input, path); + const transformedResult = getTransformedResult( + mapEntry, + input, + mapEntry.value + ); + output = { + ...output, + [mapEntry.key]: transformedResult || '', + }; + } catch (e: any) {} + }); + return output; +} + +// Similar to generateTransform, but collapse the values of the input array into +// a single field value in the transformed output. +// A specialty scenario for when configuring input on search response processors, one-to-one is false, +// and the input is an array. +export function generateArrayTransform(input: [], map: MapFormValue): {}[] { + let output = [] as {}[]; + map.forEach((mapEntry) => { + try { + const transformedResult = input.map((inputEntry) => + getTransformedResult(mapEntry, inputEntry, mapEntry.value) + ); output = { ...output, [mapEntry.key]: transformedResult || '', @@ -203,7 +221,13 @@ function getTransformedResult( input: {}, path: string ): any { - return mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) + // Edge case: if the path is ".", it implies returning + // the entire value. This may happen if full_response_path=false + // and the input is the entire result with nothing else to parse out. + // get() does not cover this case, so we override manually. + return path === '.' + ? input + : mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) ? // JSONPath transform jsonpath.query(input, path) : // Standard dot notation diff --git a/server/resources/templates/rag.json b/server/resources/templates/rag.json index d0ad304c..bba2c1cc 100644 --- a/server/resources/templates/rag.json +++ b/server/resources/templates/rag.json @@ -1,6 +1,6 @@ { - "name": "Retrieval-Augmented Generation", - "description": "A basic workflow containing the index and search pipeline configurations for performing basic retrieval-augmented generation", + "name": "Retrieval-Augmented Generation (RAG)", + "description": "A basic workflow containing the index and search pipeline configurations for performing basic retrieval-augmented generation (RAG)", "version": { "template": "1.0.0", "compatibility": [ diff --git a/test/utils.ts b/test/utils.ts index 03d067de..45a3c2df 100644 --- a/test/utils.ts +++ b/test/utils.ts @@ -93,10 +93,6 @@ export const loadPresetWorkflowTemplates = () => JSON.parse(fs.readFileSync(path.join(templatesDir, file), 'utf8')) ); -export function capitalizeEachWord(input: string): string { - return input.replace(/\b\w/g, (match) => match.toUpperCase()); -} - export const resizeObserverMock = jest.fn().mockImplementation(() => ({ observe: jest.fn(), unobserve: jest.fn(),