Skip to content

Commit

Permalink
Dynamically handle JSONPath responses
Browse files Browse the repository at this point in the history
Signed-off-by: Tyler Ohlsen <[email protected]>
  • Loading branch information
ohltyler committed Nov 15, 2024
1 parent 697241e commit 01c4f35
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,13 @@ export function InputTransformModal(props: InputTransformModalProps) {
Array.isArray(sampleSourceInput)
? generateArrayTransform(
sampleSourceInput as [],
tempInputMap[selectedTransformOption]
tempInputMap[selectedTransformOption],
props.context
)
: generateTransform(
sampleSourceInput,
tempInputMap[selectedTransformOption]
tempInputMap[selectedTransformOption],
props.context
);

setTransformedInput(customStringify(output));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ export function OutputTransformModal(props: OutputTransformModalProps) {
sampleSourceOutput = JSON.parse(sourceOutput);
const output = generateTransform(
sampleSourceOutput,
reverseKeysAndValues(tempOutputMap[selectedTransformOption])
reverseKeysAndValues(tempOutputMap[selectedTransformOption]),
props.context
);
setTransformedOutput(customStringify(output));
} catch {}
Expand Down
83 changes: 67 additions & 16 deletions public/utils/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
ModelInterface,
ModelOutput,
ModelOutputFormField,
PROCESSOR_CONTEXT,
SimulateIngestPipelineDoc,
SimulateIngestPipelineResponse,
WORKFLOW_RESOURCE_TYPE,
Expand Down Expand Up @@ -182,14 +183,19 @@ export function unwrapTransformedDocs(

// ML inference processors will use standard dot notation or JSONPath depending on the input.
// We follow the same logic here to generate consistent results.
export function generateTransform(input: {} | [], map: MapFormValue): {} {
export function generateTransform(
input: {} | [],
map: MapFormValue,
context: PROCESSOR_CONTEXT
): {} {
let output = {};
map.forEach((mapEntry) => {
try {
const transformedResult = getTransformedResult(
mapEntry,
input,
mapEntry.value
mapEntry.value,
context
);
output = {
...output,
Expand All @@ -204,12 +210,16 @@ export function generateTransform(input: {} | [], map: MapFormValue): {} {
// a single field value in the transformed output.
// A specialty scenario for when configuring input on search response processors, one-to-one is false,
// and the input is an array.
export function generateArrayTransform(input: [], map: MapFormValue): {}[] {
export function generateArrayTransform(
input: [],
map: MapFormValue,
context: PROCESSOR_CONTEXT
): {}[] {
let output = [] as {}[];
map.forEach((mapEntry) => {
try {
const transformedResult = input.map((inputEntry) =>
getTransformedResult(mapEntry, inputEntry, mapEntry.value)
getTransformedResult(mapEntry, inputEntry, mapEntry.value, context)
);
output = {
...output,
Expand All @@ -223,19 +233,60 @@ export function generateArrayTransform(input: [], map: MapFormValue): {}[] {
function getTransformedResult(
mapEntry: MapEntry,
input: {},
path: string
path: string,
context: PROCESSOR_CONTEXT
): any {
// Edge case: if the path is ".", it implies returning
// the entire value. This may happen if full_response_path=false
// and the input is the entire result with nothing else to parse out.
// get() does not cover this case, so we override manually.
return path === '.'
? input
: mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR)
? // JSONPath transform
jsonpath.query(input, path)
: // Standard dot notation
get(input, path);
// Rregular dot notation can only be executed if 1/ the JSONPath selector is not explicitly defined,
// and 2/ it is in the context of ingest. For search request / search response parsing,
// it can only be JSONPath, due to backend parsing limitations.
if (
!mapEntry.value.startsWith(JSONPATH_ROOT_SELECTOR) &&
context === PROCESSOR_CONTEXT.INGEST
) {
// sub-edge case: if the path is ".", it implies returning
// the entire value. This may happen if full_response_path=false
// and the input is the entire result with nothing else to parse out.
// get() does not cover this case, so we override manually.
if (path === '.') {
return input;
} else {
return get(input, path);
}
} else {
// The backend sets a JSONPath setting ALWAYS_RETURN_LIST=false, which
// dynamically returns a list or single value, based on whether
// the path is definite or not. We try to mimic that with a
// custom fn isIndefiniteJsonPath(), since this setting, nor
// knowing if the path is definite or indefinite, is not exposed
// by any known jsonpath JS-based / NPM libraries.
// if found to be definite, we remove the outermost array, which
// will always be returned by default when running query().
const isIndefinite = isIndefiniteJsonPath(path);
const res = jsonpath.query(input, path);
if (isIndefinite) {
return res;
} else {
return res[0];
}
}
}

// Indefinite/definite path defns:
// https://github.com/json-path/JsonPath?tab=readme-ov-file#what-is-returned-when
// Note this may not cover every use case, as the true definition requires low-level
// branch navigation of the path nodes, which is not exposed by this npm library.
// Hence, we do our best to cover the majority of use cases and common patterns.
function isIndefiniteJsonPath(path: string): boolean {
// regex has 3 overall OR checks:
// 1. consecutive '.'s, indicating deep scan - \.{2}
// 2. ?(<anything>), indicating an expression - \?\(.*\)
// 3. multiple array indices - \[\d+,\d+\] | \[.*:.*\] | \[\*\]
// if any are met, then we call the path indefinite.
const indefiniteRegEx = new RegExp(
/\.{2}|\?\(.*\)|\[\d+,\d+\]|\[.*:.*\]|\[\*\]/,
'g'
);
return indefiniteRegEx.test(path);
}

// Derive the collection of model inputs from the model interface JSONSchema into a form-ready list
Expand Down

0 comments on commit 01c4f35

Please sign in to comment.