Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve consistency of JSONPath parsing & execution across frontend/backend #481

Merged
merged 4 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,10 @@ export enum PROCESSOR_CONTEXT {
SEARCH_REQUEST = 'search_request',
SEARCH_RESPONSE = 'search_response',
}
export enum TRANSFORM_CONTEXT {
INPUT = 'input',
OUTPUT = 'output',
}
export const START_FROM_SCRATCH_WORKFLOW_NAME = 'Start From Scratch';
export const DEFAULT_NEW_WORKFLOW_NAME = 'new_workflow';
export const DEFAULT_NEW_WORKFLOW_DESCRIPTION = 'My new workflow';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import {
IConfigField,
PROCESSOR_CONTEXT,
WorkflowConfig,
JSONPATH_ROOT_SELECTOR,
WorkflowFormValues,
ModelInterface,
IndexMappings,
Expand All @@ -41,6 +40,7 @@ import {
} from './modals';
import { AppState, getMappings, useAppDispatch } from '../../../../store';
import {
convertArrayAccessorsToBracketNotation,
formikToPartialPipeline,
getDataSourceId,
parseModelInputs,
Expand Down Expand Up @@ -186,7 +186,7 @@ export function MLProcessorInputs(props: MLProcessorInputsProps) {
setDocFields(
docObjKeys.map((key) => {
return {
label: key,
label: convertArrayAccessorsToBracketNotation(key),
};
})
);
Expand All @@ -202,7 +202,7 @@ export function MLProcessorInputs(props: MLProcessorInputsProps) {
setQueryFields(
queryObjKeys.map((key) => {
return {
label: key,
label: convertArrayAccessorsToBracketNotation(key),
};
})
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import {
PROCESSOR_CONTEXT,
SearchHit,
SimulateIngestPipelineResponse,
TRANSFORM_CONTEXT,
WorkflowConfig,
WorkflowFormValues,
customStringify,
Expand Down Expand Up @@ -185,11 +186,15 @@ export function InputTransformModal(props: InputTransformModalProps) {
Array.isArray(sampleSourceInput)
? generateArrayTransform(
sampleSourceInput as [],
tempInputMap[selectedTransformOption]
tempInputMap[selectedTransformOption],
props.context,
TRANSFORM_CONTEXT.INPUT
)
: generateTransform(
sampleSourceInput,
tempInputMap[selectedTransformOption]
tempInputMap[selectedTransformOption],
props.context,
TRANSFORM_CONTEXT.INPUT
);

setTransformedInput(customStringify(output));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import {
SearchHit,
SearchPipelineConfig,
SimulateIngestPipelineResponse,
TRANSFORM_CONTEXT,
WorkflowConfig,
WorkflowFormValues,
customStringify,
Expand Down Expand Up @@ -157,7 +158,9 @@ export function OutputTransformModal(props: OutputTransformModalProps) {
sampleSourceOutput = JSON.parse(sourceOutput);
const output = generateTransform(
sampleSourceOutput,
reverseKeysAndValues(tempOutputMap[selectedTransformOption])
reverseKeysAndValues(tempOutputMap[selectedTransformOption]),
props.context,
TRANSFORM_CONTEXT.OUTPUT
);
setTransformedOutput(customStringify(output));
} catch {}
Expand Down
103 changes: 87 additions & 16 deletions public/utils/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ import {
ModelInterface,
ModelOutput,
ModelOutputFormField,
PROCESSOR_CONTEXT,
SimulateIngestPipelineDoc,
SimulateIngestPipelineResponse,
TRANSFORM_CONTEXT,
WORKFLOW_RESOURCE_TYPE,
WORKFLOW_STEP_TYPE,
Workflow,
Expand Down Expand Up @@ -182,14 +184,21 @@ export function unwrapTransformedDocs(

// ML inference processors will use standard dot notation or JSONPath depending on the input.
// We follow the same logic here to generate consistent results.
export function generateTransform(input: {} | [], map: MapFormValue): {} {
export function generateTransform(
input: {} | [],
map: MapFormValue,
context: PROCESSOR_CONTEXT,
transformContext: TRANSFORM_CONTEXT
): {} {
let output = {};
map.forEach((mapEntry) => {
try {
const transformedResult = getTransformedResult(
mapEntry,
input,
mapEntry.value
mapEntry.value,
context,
transformContext
);
output = {
...output,
Expand All @@ -204,12 +213,23 @@ export function generateTransform(input: {} | [], map: MapFormValue): {} {
// a single field value in the transformed output.
// A specialty scenario for when configuring input on search response processors, one-to-one is false,
// and the input is an array.
export function generateArrayTransform(input: [], map: MapFormValue): {}[] {
export function generateArrayTransform(
input: [],
map: MapFormValue,
context: PROCESSOR_CONTEXT,
transformContext: TRANSFORM_CONTEXT
): {}[] {
let output = [] as {}[];
map.forEach((mapEntry) => {
try {
const transformedResult = input.map((inputEntry) =>
getTransformedResult(mapEntry, inputEntry, mapEntry.value)
getTransformedResult(
mapEntry,
inputEntry,
mapEntry.value,
context,
transformContext
)
);
output = {
...output,
Expand All @@ -223,19 +243,62 @@ export function generateArrayTransform(input: [], map: MapFormValue): {}[] {
function getTransformedResult(
mapEntry: MapEntry,
input: {},
path: string
path: string,
context: PROCESSOR_CONTEXT,
transformContext: TRANSFORM_CONTEXT
): any {
// Edge case: if the path is ".", it implies returning
// the entire value. This may happen if full_response_path=false
// and the input is the entire result with nothing else to parse out.
// get() does not cover this case, so we override manually.
return path === '.'
? input
: mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR)
? // JSONPath transform
jsonpath.query(input, path)
: // Standard dot notation
get(input, path);
// Regular dot notation can only be executed if 1/ the JSONPath selector is not explicitly defined,
// and 2/ it is in the context of ingest, and 3/ it is transforming the input (the source document).
// For all other scenarios, it can only be JSONPath, due to backend parsing limitations.
if (
!mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) &&
context === PROCESSOR_CONTEXT.INGEST &&
transformContext === TRANSFORM_CONTEXT.INPUT
) {
// sub-edge case: if the path is ".", it implies returning
// the entire value. This may happen if full_response_path=false
// and the input is the entire result with nothing else to parse out.
// get() does not cover this case, so we override manually.
if (path === '.') {
return input;
} else {
return get(input, path);
}
} else {
// The backend sets a JSONPath setting ALWAYS_RETURN_LIST=false, which
// dynamically returns a list or single value, based on whether
// the path is definite or not. We try to mimic that with a
// custom fn isIndefiniteJsonPath(), since this setting, nor
// knowing if the path is definite or indefinite, is not exposed
// by any known jsonpath JS-based / NPM libraries.
// if found to be definite, we remove the outermost array, which
// will always be returned by default when running query().
const isIndefinite = isIndefiniteJsonPath(path);
const res = jsonpath.query(input, path);
if (isIndefinite) {
return res;
} else {
return res[0];
}
}
}

// Indefinite/definite path defns:
// https://github.com/json-path/JsonPath?tab=readme-ov-file#what-is-returned-when
// Note this may not cover every use case, as the true definition requires low-level
// branch navigation of the path nodes, which is not exposed by this npm library.
// Hence, we do our best to cover the majority of use cases and common patterns.
function isIndefiniteJsonPath(path: string): boolean {
// regex has 3 overall OR checks:
// 1. consecutive '.'s, indicating deep scan - \.{2}
// 2. ?(<anything>), indicating an expression - \?\(.*\)
// 3. multiple array indices - \[\d+,\d+\] | \[.*:.*\] | \[\*\]
// if any are met, then we call the path indefinite.
const indefiniteRegEx = new RegExp(
/\.{2}|\?\(.*\)|\[\d+,\d+\]|\[.*:.*\]|\[\*\]/,
'g'
);
return indefiniteRegEx.test(path);
}

// Derive the collection of model inputs from the model interface JSONSchema into a form-ready list
Expand Down Expand Up @@ -422,3 +485,11 @@ export const getErrorMessageForStepType = (
return '';
}
};

export function convertArrayAccessorsToBracketNotation(path: string): string {
return path.split('.').reduce((prevValue, curValue, idx) => {
return isNaN(parseInt(curValue))
? prevValue + '.' + curValue
: prevValue + `[${curValue}]`;
});
}
Loading