From 01c4f355df6003a39281ca5ea68248ba0ca7326d Mon Sep 17 00:00:00 2001 From: Tyler Ohlsen Date: Fri, 15 Nov 2024 11:29:52 -0800 Subject: [PATCH] Dynamically handle JSONPath responses Signed-off-by: Tyler Ohlsen --- .../modals/input_transform_modal.tsx | 6 +- .../modals/output_transform_modal.tsx | 3 +- public/utils/utils.ts | 83 +++++++++++++++---- 3 files changed, 73 insertions(+), 19 deletions(-) diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx index 24218ac8..3081a421 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/input_transform_modal.tsx @@ -185,11 +185,13 @@ export function InputTransformModal(props: InputTransformModalProps) { Array.isArray(sampleSourceInput) ? generateArrayTransform( sampleSourceInput as [], - tempInputMap[selectedTransformOption] + tempInputMap[selectedTransformOption], + props.context ) : generateTransform( sampleSourceInput, - tempInputMap[selectedTransformOption] + tempInputMap[selectedTransformOption], + props.context ); setTransformedInput(customStringify(output)); diff --git a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx index c7e94475..b554b6f7 100644 --- a/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/processor_inputs/modals/output_transform_modal.tsx @@ -157,7 +157,8 @@ export function OutputTransformModal(props: OutputTransformModalProps) { sampleSourceOutput = JSON.parse(sourceOutput); const output = generateTransform( sampleSourceOutput, - reverseKeysAndValues(tempOutputMap[selectedTransformOption]) + reverseKeysAndValues(tempOutputMap[selectedTransformOption]), + props.context ); setTransformedOutput(customStringify(output)); } catch {} diff --git a/public/utils/utils.ts b/public/utils/utils.ts index 7dc9db9b..30bff43b 100644 --- a/public/utils/utils.ts +++ b/public/utils/utils.ts @@ -15,6 +15,7 @@ import { ModelInterface, ModelOutput, ModelOutputFormField, + PROCESSOR_CONTEXT, SimulateIngestPipelineDoc, SimulateIngestPipelineResponse, WORKFLOW_RESOURCE_TYPE, @@ -182,14 +183,19 @@ export function unwrapTransformedDocs( // ML inference processors will use standard dot notation or JSONPath depending on the input. // We follow the same logic here to generate consistent results. -export function generateTransform(input: {} | [], map: MapFormValue): {} { +export function generateTransform( + input: {} | [], + map: MapFormValue, + context: PROCESSOR_CONTEXT +): {} { let output = {}; map.forEach((mapEntry) => { try { const transformedResult = getTransformedResult( mapEntry, input, - mapEntry.value + mapEntry.value, + context ); output = { ...output, @@ -204,12 +210,16 @@ export function generateTransform(input: {} | [], map: MapFormValue): {} { // a single field value in the transformed output. // A specialty scenario for when configuring input on search response processors, one-to-one is false, // and the input is an array. -export function generateArrayTransform(input: [], map: MapFormValue): {}[] { +export function generateArrayTransform( + input: [], + map: MapFormValue, + context: PROCESSOR_CONTEXT +): {}[] { let output = [] as {}[]; map.forEach((mapEntry) => { try { const transformedResult = input.map((inputEntry) => - getTransformedResult(mapEntry, inputEntry, mapEntry.value) + getTransformedResult(mapEntry, inputEntry, mapEntry.value, context) ); output = { ...output, @@ -223,19 +233,60 @@ export function generateArrayTransform(input: [], map: MapFormValue): {}[] { function getTransformedResult( mapEntry: MapEntry, input: {}, - path: string + path: string, + context: PROCESSOR_CONTEXT ): any { - // Edge case: if the path is ".", it implies returning - // the entire value. This may happen if full_response_path=false - // and the input is the entire result with nothing else to parse out. - // get() does not cover this case, so we override manually. - return path === '.' - ? input - : mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) - ? // JSONPath transform - jsonpath.query(input, path) - : // Standard dot notation - get(input, path); + // Rregular dot notation can only be executed if 1/ the JSONPath selector is not explicitly defined, + // and 2/ it is in the context of ingest. For search request / search response parsing, + // it can only be JSONPath, due to backend parsing limitations. + if ( + !mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) && + context === PROCESSOR_CONTEXT.INGEST + ) { + // sub-edge case: if the path is ".", it implies returning + // the entire value. This may happen if full_response_path=false + // and the input is the entire result with nothing else to parse out. + // get() does not cover this case, so we override manually. + if (path === '.') { + return input; + } else { + return get(input, path); + } + } else { + // The backend sets a JSONPath setting ALWAYS_RETURN_LIST=false, which + // dynamically returns a list or single value, based on whether + // the path is definite or not. We try to mimic that with a + // custom fn isIndefiniteJsonPath(), since this setting, nor + // knowing if the path is definite or indefinite, is not exposed + // by any known jsonpath JS-based / NPM libraries. + // if found to be definite, we remove the outermost array, which + // will always be returned by default when running query(). + const isIndefinite = isIndefiniteJsonPath(path); + const res = jsonpath.query(input, path); + if (isIndefinite) { + return res; + } else { + return res[0]; + } + } +} + +// Indefinite/definite path defns: +// https://github.com/json-path/JsonPath?tab=readme-ov-file#what-is-returned-when +// Note this may not cover every use case, as the true definition requires low-level +// branch navigation of the path nodes, which is not exposed by this npm library. +// Hence, we do our best to cover the majority of use cases and common patterns. +function isIndefiniteJsonPath(path: string): boolean { + // regex has 3 overall OR checks: + // 1. consecutive '.'s, indicating deep scan - \.{2} + // 2. ?(), indicating an expression - \?\(.*\) + // 3. multiple array indices - \[\d+,\d+\] | \[.*:.*\] | \[\*\] + // if any are met, then we call the path indefinite. + const indefiniteRegEx = new RegExp( + /\.{2}|\?\(.*\)|\[\d+,\d+\]|\[.*:.*\]|\[\*\]/, + 'g' + ); + return indefiniteRegEx.test(path); } // Derive the collection of model inputs from the model interface JSONSchema into a form-ready list