diff --git a/common/constants.ts b/common/constants.ts index 07e7ad5e..8e066117 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -464,6 +464,10 @@ export enum PROCESSOR_CONTEXT { SEARCH_REQUEST = 'search_request', SEARCH_RESPONSE = 'search_response', } +export enum TRANSFORM_CONTEXT { + INPUT = 'input', + OUTPUT = 'output', +} export const START_FROM_SCRATCH_WORKFLOW_NAME = 'Start From Scratch'; export const DEFAULT_NEW_WORKFLOW_NAME = 'new_workflow'; export const DEFAULT_NEW_WORKFLOW_DESCRIPTION = 'My new workflow'; diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs.tsx index e12e4dd9..2e9444e3 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/ml_processor_inputs.tsx @@ -24,7 +24,6 @@ import { IConfigField, PROCESSOR_CONTEXT, WorkflowConfig, - JSONPATH_ROOT_SELECTOR, WorkflowFormValues, ModelInterface, IndexMappings, @@ -45,6 +44,7 @@ import { getDataSourceId, parseModelInputs, parseModelOutputs, + sanitizeJSONPath, } from '../../../../utils'; import { ConfigFieldList } from '../config_field_list'; import { OverrideQueryModal } from './modals/override_query_modal'; @@ -186,7 +186,13 @@ export function MLProcessorInputs(props: MLProcessorInputsProps) { setDocFields( docObjKeys.map((key) => { return { - label: key, + label: + // ingest inputs can handle dot notation, and hence don't need + // sanitizing to handle JSONPath edge cases. The other contexts + // only support JSONPath, and hence need some post-processing/sanitizing. + props.context === PROCESSOR_CONTEXT.INGEST + ? key + : sanitizeJSONPath(key), }; }) ); @@ -202,7 +208,13 @@ export function MLProcessorInputs(props: MLProcessorInputsProps) { setQueryFields( queryObjKeys.map((key) => { return { - label: key, + label: + // ingest inputs can handle dot notation, and hence don't need + // sanitizing to handle JSONPath edge cases. The other contexts + // only support JSONPath, and hence need some post-processing/sanitizing. + props.context === PROCESSOR_CONTEXT.INGEST + ? key + : sanitizeJSONPath(key), }; }) ); @@ -391,7 +403,15 @@ export function MLProcessorInputs(props: MLProcessorInputsProps) { ? 'Specify a query field' : 'Define a document field' } - valueHelpText={`Specify a document field or define JSONPath to transform the document to map to a model input field.`} + valueHelpText={`Specify a ${ + props.context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'query' + : 'document' + } field or define JSONPath to transform the ${ + props.context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'query' + : 'document' + } to map to a model input field.`} valueOptions={ props.context === PROCESSOR_CONTEXT.INGEST ? docFields diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx index 24218ac8..50fec356 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx @@ -42,6 +42,7 @@ import { PROCESSOR_CONTEXT, SearchHit, SimulateIngestPipelineResponse, + TRANSFORM_CONTEXT, WorkflowConfig, WorkflowFormValues, customStringify, @@ -185,11 +186,15 @@ export function InputTransformModal(props: InputTransformModalProps) { Array.isArray(sampleSourceInput) ? generateArrayTransform( sampleSourceInput as [], - tempInputMap[selectedTransformOption] + tempInputMap[selectedTransformOption], + props.context, + TRANSFORM_CONTEXT.INPUT ) : generateTransform( sampleSourceInput, - tempInputMap[selectedTransformOption] + tempInputMap[selectedTransformOption], + props.context, + TRANSFORM_CONTEXT.INPUT ); setTransformedInput(customStringify(output)); @@ -303,7 +308,15 @@ export function InputTransformModal(props: InputTransformModalProps) { ? 'Specify a query field' : 'Define a document field' } - valueHelpText={`Specify a document field or define JSONPath to transform the document to map to a model input field.`} + valueHelpText={`Specify a ${ + props.context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'query' + : 'document' + } field or define JSONPath to transform the ${ + props.context === PROCESSOR_CONTEXT.SEARCH_REQUEST + ? 'query' + : 'document' + } to map to a model input field.`} valueOptions={props.valueOptions} // If the map we are adding is the first one, populate the selected option to index 0 onMapAdd={(curArray) => { diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx index c7e94475..94e2e2d0 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx @@ -42,6 +42,7 @@ import { SearchHit, SearchPipelineConfig, SimulateIngestPipelineResponse, + TRANSFORM_CONTEXT, WorkflowConfig, WorkflowFormValues, customStringify, @@ -157,7 +158,9 @@ export function OutputTransformModal(props: OutputTransformModalProps) { sampleSourceOutput = JSON.parse(sourceOutput); const output = generateTransform( sampleSourceOutput, - reverseKeysAndValues(tempOutputMap[selectedTransformOption]) + reverseKeysAndValues(tempOutputMap[selectedTransformOption]), + props.context, + TRANSFORM_CONTEXT.OUTPUT ); setTransformedOutput(customStringify(output)); } catch {} diff --git a/public/utils/config_to_template_utils.ts b/public/utils/config_to_template_utils.ts index 3d073cb2..5b9908e4 100644 --- a/public/utils/config_to_template_utils.ts +++ b/public/utils/config_to_template_utils.ts @@ -35,6 +35,7 @@ import { SearchPipelineConfig, } from '../../common'; import { processorConfigToFormik } from './config_to_form_utils'; +import { sanitizeJSONPath } from './utils'; /* **************** Config -> template utils ********************** @@ -451,11 +452,11 @@ function mergeMapIntoSingleObj( curMap = reverse ? { ...curMap, - [mapEntry.value]: mapEntry.key, + [sanitizeJSONPath(mapEntry.value)]: sanitizeJSONPath(mapEntry.key), } : { ...curMap, - [mapEntry.key]: mapEntry.value, + [sanitizeJSONPath(mapEntry.key)]: sanitizeJSONPath(mapEntry.value), }; }); return curMap; diff --git a/public/utils/utils.ts b/public/utils/utils.ts index 7dc9db9b..7d0b6fce 100644 --- a/public/utils/utils.ts +++ b/public/utils/utils.ts @@ -15,8 +15,10 @@ import { ModelInterface, ModelOutput, ModelOutputFormField, + PROCESSOR_CONTEXT, SimulateIngestPipelineDoc, SimulateIngestPipelineResponse, + TRANSFORM_CONTEXT, WORKFLOW_RESOURCE_TYPE, WORKFLOW_STEP_TYPE, Workflow, @@ -182,14 +184,21 @@ export function unwrapTransformedDocs( // ML inference processors will use standard dot notation or JSONPath depending on the input. // We follow the same logic here to generate consistent results. -export function generateTransform(input: {} | [], map: MapFormValue): {} { +export function generateTransform( + input: {} | [], + map: MapFormValue, + context: PROCESSOR_CONTEXT, + transformContext: TRANSFORM_CONTEXT +): {} { let output = {}; map.forEach((mapEntry) => { try { const transformedResult = getTransformedResult( mapEntry, input, - mapEntry.value + mapEntry.value, + context, + transformContext ); output = { ...output, @@ -204,12 +213,23 @@ export function generateTransform(input: {} | [], map: MapFormValue): {} { // a single field value in the transformed output. // A specialty scenario for when configuring input on search response processors, one-to-one is false, // and the input is an array. -export function generateArrayTransform(input: [], map: MapFormValue): {}[] { +export function generateArrayTransform( + input: [], + map: MapFormValue, + context: PROCESSOR_CONTEXT, + transformContext: TRANSFORM_CONTEXT +): {}[] { let output = [] as {}[]; map.forEach((mapEntry) => { try { const transformedResult = input.map((inputEntry) => - getTransformedResult(mapEntry, inputEntry, mapEntry.value) + getTransformedResult( + mapEntry, + inputEntry, + mapEntry.value, + context, + transformContext + ) ); output = { ...output, @@ -223,19 +243,62 @@ export function generateArrayTransform(input: [], map: MapFormValue): {}[] { function getTransformedResult( mapEntry: MapEntry, input: {}, - path: string + path: string, + context: PROCESSOR_CONTEXT, + transformContext: TRANSFORM_CONTEXT ): any { - // Edge case: if the path is ".", it implies returning - // the entire value. This may happen if full_response_path=false - // and the input is the entire result with nothing else to parse out. - // get() does not cover this case, so we override manually. - return path === '.' - ? input - : mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) - ? // JSONPath transform - jsonpath.query(input, path) - : // Standard dot notation - get(input, path); + // Regular dot notation can only be executed if 1/ the JSONPath selector is not explicitly defined, + // and 2/ it is in the context of ingest, and 3/ it is transforming the input (the source document). + // For all other scenarios, it can only be JSONPath, due to backend parsing limitations. + if ( + !mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) && + context === PROCESSOR_CONTEXT.INGEST && + transformContext === TRANSFORM_CONTEXT.INPUT + ) { + // sub-edge case: if the path is ".", it implies returning + // the entire value. This may happen if full_response_path=false + // and the input is the entire result with nothing else to parse out. + // get() does not cover this case, so we override manually. + if (path === '.') { + return input; + } else { + return get(input, path); + } + } else { + // The backend sets a JSONPath setting ALWAYS_RETURN_LIST=false, which + // dynamically returns a list or single value, based on whether + // the path is definite or not. We try to mimic that with a + // custom fn isIndefiniteJsonPath(), since this setting, nor + // knowing if the path is definite or indefinite, is not exposed + // by any known jsonpath JS-based / NPM libraries. + // if found to be definite, we remove the outermost array, which + // will always be returned by default when running query(). + const isIndefinite = isIndefiniteJsonPath(path); + const res = jsonpath.query(input, path); + if (isIndefinite) { + return res; + } else { + return res[0]; + } + } +} + +// Indefinite/definite path defns: +// https://github.com/json-path/JsonPath?tab=readme-ov-file#what-is-returned-when +// Note this may not cover every use case, as the true definition requires low-level +// branch navigation of the path nodes, which is not exposed by this npm library. +// Hence, we do our best to cover the majority of use cases and common patterns. +function isIndefiniteJsonPath(path: string): boolean { + // regex has 3 overall OR checks: + // 1. consecutive '.'s, indicating deep scan - \.{2} + // 2. ?(), indicating an expression - \?\(.*\) + // 3. multiple array indices - \[\d+,\d+\] | \[.*:.*\] | \[\*\] + // if any are met, then we call the path indefinite. + const indefiniteRegEx = new RegExp( + /\.{2}|\?\(.*\)|\[\d+,\d+\]|\[.*:.*\]|\[\*\]/, + 'g' + ); + return indefiniteRegEx.test(path); } // Derive the collection of model inputs from the model interface JSONSchema into a form-ready list @@ -422,3 +485,22 @@ export const getErrorMessageForStepType = ( return ''; } }; + +// Sanitize the nested keys in a given JSONPath definition. +// to ensure it works consistently on the frontend & backend. There are several discrepancies +// between the frontend and the backend packages, such that some +// scenarios will succeed on the frontend and fail on the backend, +// or vice versa. +export function sanitizeJSONPath(path: string): string { + return path.split('.').reduce((prevValue, curValue, idx) => { + // Case 1: accessing array via dot notation. Fails on the backend. + if (!isNaN(parseInt(curValue))) { + return prevValue + `[${curValue}]`; + // Case 2: accessing key with a dash via dot notation. Fails on the frontend. + } else if (curValue.includes('-')) { + return prevValue + `["${curValue}"]`; + } else { + return prevValue + '.' + curValue; + } + }); +}