From 9b8c5166bace2376b5ef3c8d4aefc686736bcbb5 Mon Sep 17 00:00:00 2001 From: Rico Kahler Date: Wed, 19 Feb 2025 20:24:23 -0600 Subject: [PATCH 1/2] feat(evaluator): simplified synchronous evaluator --- .vscode/launch.json | 20 + package-lock.json | 22 + package.json | 1 + src/1.ts | 22 +- src/evaluator/constantEvaluate.ts | 55 -- src/evaluator/equality.ts | 22 +- src/evaluator/evaluate.ts | 634 ++++++----------- src/evaluator/functions.ts | 1077 ++++++++++++----------------- src/evaluator/index.ts | 3 +- src/evaluator/matching.ts | 33 +- src/evaluator/operators.ts | 291 ++++---- src/evaluator/ordering.ts | 74 +- src/evaluator/pt.ts | 47 -- src/evaluator/scope.ts | 39 -- src/evaluator/scoring.ts | 104 +-- src/evaluator/types.ts | 70 +- src/parser.ts | 43 +- src/typeEvaluator/index.ts | 19 +- src/typeEvaluator/matching.ts | 10 +- src/values/Path.ts | 36 - src/values/StreamValue.ts | 76 -- src/values/dateHelpers.ts | 37 - src/values/index.ts | 4 - src/values/types.ts | 40 -- src/values/utils.ts | 137 ---- test/evaluate.test.ts | 170 ++--- 26 files changed, 1058 insertions(+), 2028 deletions(-) create mode 100644 .vscode/launch.json delete mode 100644 src/evaluator/constantEvaluate.ts delete mode 100644 src/evaluator/pt.ts delete mode 100644 src/evaluator/scope.ts delete mode 100644 src/values/Path.ts delete mode 100644 src/values/StreamValue.ts delete mode 100644 src/values/dateHelpers.ts delete mode 100644 src/values/index.ts delete mode 100644 src/values/types.ts delete mode 100644 src/values/utils.ts diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..5b335e9b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,20 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug Current Tape Test", + "type": "node", + "request": "launch", + "runtimeExecutable": "node", + "args": [ + "${workspaceFolder}/node_modules/.bin/tap", + "--no-timeout", + "--coverage-report=html", + "--no-browser", + "${file}" + ], + "console": "integratedTerminal", + "internalConsoleOptions": "openOnSessionStart" + } + ] +} diff --git a/package-lock.json b/package-lock.json index 30a6b42f..42fd382d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.15.0", "license": "MIT", "dependencies": { + "@portabletext/toolkit": "^2.0.17", "debug": "^4.3.4" }, "devDependencies": { @@ -1697,6 +1698,27 @@ "node": ">=12" } }, + "node_modules/@portabletext/toolkit": { + "version": "2.0.17", + "resolved": "https://registry.npmjs.org/@portabletext/toolkit/-/toolkit-2.0.17.tgz", + "integrity": "sha512-5wj+oUaCmHm9Ay1cytPmT1Yc0SrR1twwUIc0qNQ3MtaXaNMPw99Gjt1NcA34yfyKmEf/TAB2NiiT72jFxdddIQ==", + "license": "MIT", + "dependencies": { + "@portabletext/types": "^2.0.13" + }, + "engines": { + "node": "^14.13.1 || >=16.0.0" + } + }, + "node_modules/@portabletext/types": { + "version": "2.0.13", + "resolved": "https://registry.npmjs.org/@portabletext/types/-/types-2.0.13.tgz", + "integrity": "sha512-5xk5MSyQU9CrDho3Rsguj38jhijhD36Mk8S6mZo3huv6PM+t4M/5kJN2KFIxgvt4ONpvOEs1pVIZAV0cL0Vi+Q==", + "license": "MIT", + "engines": { + "node": "^14.13.1 || >=16.0.0 || >=18.0.0" + } + }, "node_modules/@rollup/plugin-alias": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/@rollup/plugin-alias/-/plugin-alias-5.1.1.tgz", diff --git a/package.json b/package.json index 1398a0ce..17d3fa53 100644 --- a/package.json +++ b/package.json @@ -65,6 +65,7 @@ ] }, "dependencies": { + "@portabletext/toolkit": "^2.0.17", "debug": "^4.3.4" }, "devDependencies": { diff --git a/src/1.ts b/src/1.ts index 8d258720..dafcee38 100644 --- a/src/1.ts +++ b/src/1.ts @@ -1,32 +1,14 @@ -export {evaluate} from './evaluator' +export {evaluateQuery as evaluate} from './evaluator' export type {GroqFunction, GroqFunctionArg, GroqPipeFunction} from './evaluator/functions' -export type {Scope} from './evaluator/scope' export type { Context, DereferenceFunction, Document, - EvaluateOptions, - Executor, + EvaluateQueryOptions as EvaluateOptions, } from './evaluator/types' export * from './nodeTypes' export {parse} from './parser' export type {ParseOptions} from './types' -export type { - AnyStaticValue, - ArrayValue, - BooleanValue, - DateTimeValue, - GroqType, - NullValue, - NumberValue, - ObjectValue, - PathValue, - StaticValue, - StreamValue, - StringValue, - Value, -} from './values' -export {DateTime, Path} from './values' // Type evaluation export type * from './typeEvaluator' diff --git a/src/evaluator/constantEvaluate.ts b/src/evaluator/constantEvaluate.ts deleted file mode 100644 index 6890b8cb..00000000 --- a/src/evaluator/constantEvaluate.ts +++ /dev/null @@ -1,55 +0,0 @@ -import type {ExprNode} from '../nodeTypes' -import {NULL_VALUE, type Value} from '../values' -import {evaluate} from './evaluate' -import {Scope} from './scope' - -function canConstantEvaluate(node: ExprNode): boolean { - switch (node.type) { - case 'Group': - return canConstantEvaluate(node.base) - case 'Value': - case 'Parameter': - return true - case 'Pos': - case 'Neg': - return canConstantEvaluate(node.base) - case 'OpCall': - switch (node.op) { - case '+': - case '-': - case '*': - case '/': - case '%': - case '**': - return canConstantEvaluate(node.left) && canConstantEvaluate(node.right) - default: - return false - } - default: - return false - } -} - -const DUMMY_SCOPE = new Scope( - {}, - NULL_VALUE, - NULL_VALUE, - {timestamp: new Date(0), identity: 'me', before: null, after: null}, - null, -) - -export function tryConstantEvaluate(node: ExprNode): Value | null { - if (!canConstantEvaluate(node)) { - return null - } - - return constantEvaluate(node) -} - -function constantEvaluate(node: ExprNode): Value { - const value = evaluate(node, DUMMY_SCOPE, constantEvaluate) - if ('then' in value) { - throw new Error('BUG: constant evaluate should never return a promise') - } - return value -} diff --git a/src/evaluator/equality.ts b/src/evaluator/equality.ts index 111cda6b..fd49a700 100644 --- a/src/evaluator/equality.ts +++ b/src/evaluator/equality.ts @@ -1,17 +1,17 @@ -import type {Value} from '../values' +import {isIso8601} from './evaluate' -export function isEqual(a: Value, b: Value): boolean { - if ( - (a.type === 'string' && b.type === 'string') || - (a.type === 'boolean' && b.type === 'boolean') || - (a.type === 'null' && b.type === 'null') || - (a.type === 'number' && b.type === 'number') - ) { - return a.data === b.data +export function isEqual(a: unknown = null, b: unknown = null): boolean { + if (isIso8601(a) && isIso8601(b)) { + return new Date(a).getTime() === new Date(b).getTime() } - if (a.type === 'datetime' && b.type === 'datetime') { - return a.data.equals(b.data) + if ( + (a === null && b === null) || + (typeof a === 'string' && typeof b === 'string') || + (typeof a === 'boolean' && typeof b === 'boolean') || + (typeof a === 'number' && typeof b === 'number') + ) { + return a === b } return false diff --git a/src/evaluator/evaluate.ts b/src/evaluator/evaluate.ts index 88174b30..76ac2067 100644 --- a/src/evaluator/evaluate.ts +++ b/src/evaluator/evaluate.ts @@ -1,485 +1,303 @@ -import type {ExprNode, FuncCallNode, PipeFuncCallNode} from '../nodeTypes' -import { - FALSE_VALUE, - fromJS, - fromNumber, - NULL_VALUE, - StreamValue, - TRUE_VALUE, - type Value, -} from '../values' -import {operators} from './operators' -import {partialCompare} from './ordering' -import {Scope} from './scope' -import type {EvaluateOptions, Executor} from './types' - -export function evaluate( - node: ExprNode, - scope: Scope, - execute: Executor = evaluate, -): Value | PromiseLike { - const func = EXECUTORS[node.type] - return func(node as any, scope, execute) +/* eslint-disable max-statements */ +/* eslint-disable complexity */ +import type {ExprNode} from '../nodeTypes' +import {evaluateOpCall} from './operators' +import {compare} from './ordering' +import type {Context, EvaluateQueryOptions} from './types' + +interface EvaluateOptions extends Context { + node: ExprNode } -type NarrowNode = T extends {type: N} ? T : never +const iso8601Regex = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})$/ +export const isIso8601 = (str: unknown): str is string => + typeof str === 'string' && iso8601Regex.test(str) -type ExecutorMap = { - [key in ExprNode['type']]: ( - node: NarrowNode, - scope: Scope, - exec: Executor, - ) => Value | PromiseLike -} - -/** - * Applies the function to a value, but tries to avoid creating unnecessary promises. - */ -function promiselessApply( - value: Value | PromiseLike, - cb: (val: Value) => Value, -): Value | PromiseLike { - if ('then' in value) { - return value.then(cb) - } - - return cb(value) -} - -const EXECUTORS: ExecutorMap = { - This(_, scope) { - return scope.value - }, - - Selector() { - // These should be evaluated separely using a different evaluator. - // At the mooment we haven't implemented this. - throw new Error('Selectors can not be evaluated') - }, - - Everything(_, scope) { - return scope.source - }, - - Parameter({name}, scope) { - return fromJS(scope.params[name]) - }, - - Context({key}, scope) { - if (key === 'before' || key === 'after') { - const value = scope.context[key] - return value || NULL_VALUE - } - throw new Error(`unknown context key: ${key}`) - }, - - Parent({n}, scope) { - let current = scope - for (let i = 0; i < n; i++) { - if (!current.parent) { - return NULL_VALUE - } - - current = current.parent +export function evaluate({node, ...context}: EvaluateOptions): unknown { + switch (node.type) { + case 'This': { + return context.scope.at(-1) } - return current.value - }, - OpCall({op, left, right}, scope, execute) { - const func = operators[op] - if (!func) { - throw new Error(`Unknown operator: ${op}`) + case 'Selector': { + // These should be evaluated separately using a different evaluator. + // At the moment we haven't implemented this. + throw new Error('Selectors can not be evaluated') } - const leftValue = execute(left, scope) - const rightValue = execute(right, scope) - // Avoid uneccesary promises - // This is required for constant evaluation to work correctly. - if ('then' in leftValue || 'then' in rightValue) { - return (async () => func(await leftValue, await rightValue))() + case 'Everything': { + return context.scope.at(0) } - return func(leftValue, rightValue) - }, - - async Select({alternatives, fallback}, scope, execute) { - for (const alt of alternatives) { - const altCond = await execute(alt.condition, scope) - if (altCond.type === 'boolean' && altCond.data === true) { - return execute(alt.value, scope) - } + case 'Parameter': { + return context.params?.[node.name] ?? null } - if (fallback) { - return execute(fallback, scope) + case 'Context': { + if (node.key === 'before' || node.key === 'after') { + return context[node.key] ?? null + } + throw new Error(`Unknown context key: ${node.key}`) } - return NULL_VALUE - }, - - async InRange({base, left, right, isInclusive}, scope, execute) { - const value = await execute(base, scope) - const leftValue = await execute(left, scope) - const rightValue = await execute(right, scope) - - const leftCmp = partialCompare(await value.get(), await leftValue.get()) - if (leftCmp === null) { - return NULL_VALUE - } - const rightCmp = partialCompare(await value.get(), await rightValue.get()) - if (rightCmp === null) { - return NULL_VALUE + case 'Parent': { + return context.scope.at(-node.n) ?? null } - if (isInclusive) { - return leftCmp >= 0 && rightCmp <= 0 ? TRUE_VALUE : FALSE_VALUE + case 'OpCall': { + return evaluateOpCall({...context, node}) } - return leftCmp >= 0 && rightCmp < 0 ? TRUE_VALUE : FALSE_VALUE - }, - - async Filter({base, expr}, scope, execute) { - const baseValue = await execute(base, scope) - if (!baseValue.isArray()) { - return NULL_VALUE - } - return new StreamValue(async function* () { - for await (const elem of baseValue) { - const newScope = scope.createNested(elem) - const exprValue = await execute(expr, newScope) - if (exprValue.type === 'boolean' && exprValue.data === true) { - yield elem + case 'Select': { + for (const alternative of node.alternatives) { + if (evaluate({...context, node: alternative.condition}) === true) { + return evaluate({...context, node: alternative.value}) } } - }) - }, - - async Projection({base, expr}, scope, execute) { - const baseValue = await execute(base, scope) - if (baseValue.type !== 'object') { - return NULL_VALUE + if (node.fallback) return evaluate({...context, node: node.fallback}) + return null } - const newScope = scope.createNested(baseValue) - return execute(expr, newScope) - }, - - FuncCall({func, args}: FuncCallNode, scope: Scope, execute) { - return func(args, scope, execute) - }, + case 'InRange': { + const base = evaluate({...context, node: node.base}) + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) - async PipeFuncCall({func, base, args}: PipeFuncCallNode, scope: Scope, execute) { - const baseValue = await execute(base, scope) - return func(baseValue, args, scope, execute) - }, - - async AccessAttribute({base, name}, scope, execute) { - let value = scope.value - if (base) { - value = await execute(base, scope) - } - if (value.type === 'object') { - if (value.data.hasOwnProperty(name)) { - return fromJS(value.data[name]) + try { + if (node.isInclusive) { + return compare(base, left) >= 0 && compare(base, right) <= 0 + } + return compare(base, left) >= 0 && compare(base, right) < 0 + } catch { + return null } } - return NULL_VALUE - }, - - async AccessElement({base, index}, scope, execute) { - const baseValue = await execute(base, scope) - if (!baseValue.isArray()) { - return NULL_VALUE + case 'Filter': { + const base = evaluate({...context, node: node.base}) + if (!Array.isArray(base)) return null + return base.filter((item) => + evaluate({ + ...context, + node: node.expr, + scope: [...context.scope, item], + }), + ) } - const data = await baseValue.get() - const finalIndex = index < 0 ? index + data.length : index - return fromJS(data[finalIndex]) - }, - - async Slice({base, left, right, isInclusive}, scope, execute) { - const baseValue = await execute(base, scope) - - if (!baseValue.isArray()) { - return NULL_VALUE + case 'Projection': { + const base = evaluate({...context, node: node.base}) + if (typeof base !== 'object' || !base) return null + return evaluate({ + ...context, + node: node.expr, + scope: [...context.scope, base], + }) } - // OPT: Here we can optimize when either indices are >= 0 - const array = (await baseValue.get()) as any[] - - let leftIdx = left - let rightIdx = right - - // Handle negative index - if (leftIdx < 0) { - leftIdx = array.length + leftIdx - } - if (rightIdx < 0) { - rightIdx = array.length + rightIdx + case 'FuncCall': { + return node.func({args: node.args, ...context}) } - // Convert from inclusive to exclusive index - if (isInclusive) { - rightIdx++ + case 'PipeFuncCall': { + return node.func({ + ...context, + base: evaluate({...context, node: node.base}), + args: node.args, + }) } - if (leftIdx < 0) { - leftIdx = 0 - } - if (rightIdx < 0) { - rightIdx = 0 + case 'AccessAttribute': { + const value = node.base ? evaluate({...context, node: node.base}) : context.scope.at(-1) + if (typeof value === 'object' && !!value && node.name in value) { + return value[node.name as keyof typeof value] + } + return null } - // Note: At this point the indices might point out-of-bound, but - // .slice handles this correctly. - - return fromJS(array.slice(leftIdx, rightIdx)) - }, - - async Deref({base}, scope, execute) { - const value = await execute(base, scope) - - if (!scope.source.isArray()) { - return NULL_VALUE + case 'AccessElement': { + const base = evaluate({...context, node: node.base}) + if (!Array.isArray(base)) return null + return base.at(node.index) } - if (value.type !== 'object') { - return NULL_VALUE + case 'Slice': { + const base = evaluate({...context, node: node.base}) + if (!Array.isArray(base)) return null + return base.slice(node.left, node.isInclusive ? node.right + 1 : node.right) } - const id = value.data['_ref'] - if (typeof id !== 'string') { - return NULL_VALUE - } + case 'Deref': { + const base = evaluate({...context, node: node.base}) + const root = context.scope.at(0) + if (!Array.isArray(root)) return null + if (typeof base !== 'object' || !base) return null + if (!('_ref' in base) || typeof base._ref !== 'string') return null - if (scope.context.dereference) { - return fromJS(await scope.context.dereference({_ref: id})) + return root.find( + (doc: unknown) => + typeof doc === 'object' && + !!doc && + '_id' in doc && + typeof doc._id === 'string' && + doc._id === base._ref, + ) } - for await (const doc of scope.source) { - if (doc.type === 'object' && id === doc.data['_id']) { - return doc - } + case 'Value': { + return node.value } - return NULL_VALUE - }, - - Value({value}) { - return fromJS(value) - }, - - Group({base}, scope, execute) { - return execute(base, scope) - }, - - async Object({attributes}, scope, execute) { - const result: {[key: string]: any} = {} - for (const attr of attributes) { - const attrType = attr.type - switch (attr.type) { - case 'ObjectAttributeValue': { - const value = await execute(attr.value, scope) - result[attr.name] = await value.get() - break - } + case 'Group': { + return evaluate({...context, node: node.base}) + } - case 'ObjectConditionalSplat': { - const cond = await execute(attr.condition, scope) - if (cond.type !== 'boolean' || cond.data === false) { - continue + case 'Object': { + return node.attributes.reduce>((acc, attribute) => { + switch (attribute.type) { + case 'ObjectAttributeValue': { + const value = evaluate({...context, node: attribute.value}) + if (value !== undefined) { + acc[attribute.name] = value + } + return acc } - const value = await execute(attr.value, scope) - if (value.type === 'object') { - Object.assign(result, value.data) + case 'ObjectConditionalSplat': { + if (evaluate({...context, node: attribute.condition}) === true) { + const value = evaluate({...context, node: attribute.value}) + if (typeof value === 'object' && !!value) { + Object.assign(acc, value) + } + } + return acc } - break - } - case 'ObjectSplat': { - const value = await execute(attr.value, scope) - if (value.type === 'object') { - Object.assign(result, value.data) + case 'ObjectSplat': { + const value = evaluate({...context, node: attribute.value}) + if (typeof value === 'object' && !!value) { + Object.assign(acc, value) + } + return acc } - break - } - default: - throw new Error(`Unknown node type: ${attrType}`) - } - } - return fromJS(result) - }, - - Array({elements}, scope, execute) { - return new StreamValue(async function* () { - for (const element of elements) { - const value = await execute(element.value, scope) - if (element.isSplat) { - if (value.isArray()) { - for await (const v of value) { - yield v - } + default: { + throw new Error( + `Unknown node type: ${ + // @ts-expect-error this type should not exist + attribute.type + }`, + ) } - } else { - yield value } - } - }) - }, - - Tuple() { - throw new Error('tuples can not be evaluated') - }, - - async Or({left, right}, scope, execute) { - const leftValue = await execute(left, scope) - const rightValue = await execute(right, scope) - - if (leftValue.type === 'boolean') { - if (leftValue.data === true) { - return TRUE_VALUE - } + }, {}) } - if (rightValue.type === 'boolean') { - if (rightValue.data === true) { - return TRUE_VALUE - } + case 'Array': { + return node.elements.flatMap((element) => { + const value = evaluate({...context, node: element.value}) + if (element.isSplat) return Array.isArray(value) ? value : [] + return value + }) } - if (leftValue.type !== 'boolean' || rightValue.type !== 'boolean') { - return NULL_VALUE + case 'Tuple': { + throw new Error('tuples can not be evaluated') } - return FALSE_VALUE - }, - - async And({left, right}, scope, execute) { - const leftValue = await execute(left, scope) - const rightValue = await execute(right, scope) - - if (leftValue.type === 'boolean') { - if (leftValue.data === false) { - return FALSE_VALUE - } + case 'Or': { + const left = evaluate({...context, node: node.left}) + if (left === true) return true + const right = evaluate({...context, node: node.right}) + if (right === true) return true + if (typeof left !== 'boolean' || typeof right !== 'boolean') return null + return false } - if (rightValue.type === 'boolean') { - if (rightValue.data === false) { - return FALSE_VALUE - } + case 'And': { + const left = evaluate({...context, node: node.left}) + if (left === false) return false + const right = evaluate({...context, node: node.right}) + if (right === false) return false + if (typeof left !== 'boolean' || typeof right !== 'boolean') return null + return true } - if (leftValue.type !== 'boolean' || rightValue.type !== 'boolean') { - return NULL_VALUE + case 'Not': { + const base = evaluate({...context, node: node.base}) + if (typeof base !== 'boolean') return null + return base } - return TRUE_VALUE - }, + case 'Neg': { + const base = evaluate({...context, node: node.base}) + if (typeof base !== 'number') return null + return -base + } - async Not({base}, scope, execute) { - const value = await execute(base, scope) - if (value.type !== 'boolean') { - return NULL_VALUE + case 'Pos': { + const base = evaluate({...context, node: node.base}) + if (typeof base !== 'number') return null + return base } - return value.data ? FALSE_VALUE : TRUE_VALUE - }, - Neg({base}, scope, execute) { - return promiselessApply(execute(base, scope), (value) => { - if (value.type !== 'number') { - return NULL_VALUE - } - return fromNumber(-value.data) - }) - }, - - Pos({base}, scope, execute) { - return promiselessApply(execute(base, scope), (value) => { - if (value.type !== 'number') { - return NULL_VALUE - } - return fromNumber(value.data) - }) - }, + case 'Asc': { + return null + } - Asc() { - return NULL_VALUE - }, + case 'Desc': { + return null + } - Desc() { - return NULL_VALUE - }, + case 'ArrayCoerce': { + const base = evaluate({...context, node: node.base}) + if (Array.isArray(base)) return base + return null + } - async ArrayCoerce({base}, scope, execute) { - const value = await execute(base, scope) - return value.isArray() ? value : NULL_VALUE - }, + case 'Map': { + const base = evaluate({...context, node: node.base}) + if (!Array.isArray(base)) return null + return base.map((item) => + evaluate({ + ...context, + node: node.expr, + scope: [...context.scope.slice(0, -1), item], + }), + ) + } - async Map({base, expr}, scope, execute) { - const value = await execute(base, scope) - if (!value.isArray()) { - return NULL_VALUE + case 'FlatMap': { + const base = evaluate({...context, node: node.base}) + if (!Array.isArray(base)) return null + return base.flatMap((item) => { + const child = evaluate({ + ...context, + node: node.expr, + scope: [...context.scope.slice(0, -1), item], + }) + if (Array.isArray(child)) return child + return [child] + }) } - return new StreamValue(async function* () { - for await (const elem of value) { - const newScope = scope.createHidden(elem) - yield await execute(expr, newScope) - } - }) - }, - - async FlatMap({base, expr}, scope, execute) { - const value = await execute(base, scope) - if (!value.isArray()) { - return NULL_VALUE - } - - return new StreamValue(async function* () { - for await (const elem of value) { - const newScope = scope.createHidden(elem) - const innerValue = await execute(expr, newScope) - if (innerValue.isArray()) { - for await (const inner of innerValue) { - yield inner - } - } else { - yield innerValue - } - } - }) - }, + default: { + throw new Error( + `Unrecognized node type: ${ + // @ts-expect-error should be of type `never` since this is a fallback + node?.type + }`, + ) + } + } } -/** - * Evaluates a query. - * @internal - */ -export function evaluateQuery( - tree: ExprNode, - options: EvaluateOptions = {}, -): Value | PromiseLike { - const root = fromJS(options.root) - const dataset = fromJS(options.dataset) - const params: {[key: string]: any} = {...options.params} - - const scope = new Scope( - params, - dataset, - root, - { - timestamp: options.timestamp || new Date(), - identity: options.identity === undefined ? 'me' : options.identity, - sanity: options.sanity, - after: options.after ? fromJS(options.after) : null, - before: options.before ? fromJS(options.before) : null, - dereference: options.dereference, - }, - null, - ) - return evaluate(tree, scope) +export function evaluateQuery(tree: ExprNode, options: EvaluateQueryOptions = {}): unknown { + return evaluate({ + identity: options.identity ?? 'me', + scope: [options.dataset ?? []], + timestamp: new Date().toISOString(), + ...options, + node: tree, + }) } diff --git a/src/evaluator/functions.ts b/src/evaluator/functions.ts index 65f07c49..88eaeb84 100644 --- a/src/evaluator/functions.ts +++ b/src/evaluator/functions.ts @@ -1,47 +1,29 @@ +import {toPlainText} from '@portabletext/toolkit' +import type {ArbitraryTypedObject, PortableTextBlock} from '@portabletext/types' + import type {ExprNode} from '../nodeTypes' -import { - DateTime, - FALSE_VALUE, - fromDateTime, - fromJS, - fromNumber, - fromPath, - fromString, - getType, - NULL_VALUE, - Path, - StreamValue, - TRUE_VALUE, - type Value, -} from '../values' -import {totalCompare} from './ordering' -import {portableTextContent} from './pt' -import {Scope} from './scope' +import {evaluate, isIso8601} from './evaluate' +import {compare, getTypeRank} from './ordering' import {evaluateScore} from './scoring' -import type {Executor} from './types' -import {isEqual} from './equality' - -function hasReference(value: any, pathSet: Set): boolean { - switch (getType(value)) { - case 'array': - for (const v of value) { - if (hasReference(v, pathSet)) { - return true - } - } - break - case 'object': - if (value._ref) { - return pathSet.has(value._ref) - } - for (const v of Object.values(value)) { - if (hasReference(v, pathSet)) { - return true - } - } - break - default: +import type {Context} from './types' + +function hasReference(value: unknown, paths: Set): boolean { + if (Array.isArray(value)) { + for (const child of value) { + if (hasReference(child, paths)) return true + } + } + + if (typeof value !== 'object' || !value) return false + + if ('_ref' in value && typeof value._ref === 'string') { + return paths.has(value._ref) } + + for (const child of Object.values(value)) { + if (hasReference(child, paths)) return true + } + return false } @@ -60,6 +42,16 @@ function countUTF8(str: string): number { return count } +function createStub({arity, mode}: WithOptions) { + function notImplemented(): never { + throw new Error('Not implemented') + } + return Object.assign(notImplemented, { + ...(arity && {arity}), + ...(mode && {mode}), + }) +} + /** @public */ export type GroqFunctionArg = ExprNode type WithOptions = T & { @@ -70,696 +62,501 @@ type WithOptions = T & { export type GroqFunctionArity = number | ((count: number) => boolean) /** @public */ -export type GroqFunction = ( - args: GroqFunctionArg[], - scope: Scope, - execute: Executor, -) => PromiseLike +export interface GroqFunctionOptions extends Context { + args: GroqFunctionArg[] +} + +/** @public */ +export type GroqFunction = (options: GroqFunctionOptions) => unknown export type FunctionSet = Record | undefined> export type NamespaceSet = Record -// underscored to not collide with environments like jest that give variables named `global` special treatment -const _global: FunctionSet = {} +export type GroqPipeFunctionOptions = GroqFunctionOptions & {base: unknown} +export type GroqPipeFunction = (options: GroqPipeFunctionOptions) => unknown +type ObjectWithScore = Record & {_score: number} -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -_global['anywhere'] = async function anywhere() { +function anywhere(): never { throw new Error('not implemented') } +anywhere.arity = 1 -_global['anywhere'].arity = 1 - -_global['coalesce'] = async function coalesce(args, scope, execute) { +function coalesce({args, ...context}: GroqFunctionOptions): unknown { for (const arg of args) { - const value = await execute(arg, scope) - if (value.type !== 'null') { - return value - } + const value = evaluate({...context, node: arg}) + if (value !== null && value !== undefined) return value } - return NULL_VALUE + return null } +coalesce.arity = 1 -_global['count'] = async function count(args, scope, execute) { - const inner = await execute(args[0], scope) - if (!inner.isArray()) { - return NULL_VALUE - } - - let num = 0 - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of inner) { - num++ - } - return fromNumber(num) +function count({args: [arg], ...context}: GroqFunctionOptions): number | null { + const base = evaluate({...context, node: arg}) + if (!Array.isArray(base)) return null + return base.length } -_global['count'].arity = 1 +count.arity = 1 -_global['dateTime'] = async function dateTime(args, scope, execute) { - const val = await execute(args[0], scope) - if (val.type === 'datetime') { - return val - } - if (val.type !== 'string') { - return NULL_VALUE - } - return DateTime.parseToValue(val.data) +function dateTime({args: [arg], ...context}: GroqFunctionOptions): string | null { + const base = evaluate({...context, node: arg}) + if (typeof base !== 'string') return null + if (isIso8601(base)) return base + return new Date(base).toISOString() } -_global['dateTime'].arity = 1 +dateTime.arity = 1 -_global['defined'] = async function defined(args, scope, execute) { - const inner = await execute(args[0], scope) - return inner.type === 'null' ? FALSE_VALUE : TRUE_VALUE +function defined({args: [arg], ...context}: GroqFunctionOptions): boolean { + const base = evaluate({...context, node: arg}) + return base !== null && base !== undefined } -_global['defined'].arity = 1 +defined.arity = 1 -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -_global['identity'] = async function identity(_args, scope) { - return fromString(scope.context.identity) +function identity({identity}: GroqFunctionOptions): string { + return identity } -_global['identity'].arity = 0 - -_global['length'] = async function length(args, scope, execute) { - const inner = await execute(args[0], scope) - - if (inner.type === 'string') { - return fromNumber(countUTF8(inner.data)) - } - - if (inner.isArray()) { - let num = 0 - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of inner) { - num++ - } - return fromNumber(num) - } +identity.arity = 0 - return NULL_VALUE +function length({args: [baseArg], ...context}: GroqFunctionOptions): number | null { + const base = evaluate({...context, node: baseArg}) + if (typeof base === 'string') return countUTF8(base) + if (Array.isArray(base)) return base.length + return null } -_global['length'].arity = 1 - -_global['path'] = async function path(args, scope, execute) { - const inner = await execute(args[0], scope) - if (inner.type !== 'string') { - return NULL_VALUE - } +length.arity = 1 - return fromPath(new Path(inner.data)) +function path({args: [baseArg], ...context}: GroqFunctionOptions): string | null { + const base = evaluate({...context, node: baseArg}) + if (typeof base !== 'string') return null + return base } -_global['path'].arity = 1 +path.arity = 1 -_global['string'] = async function string(args, scope, execute) { - const value = await execute(args[0], scope) - switch (value.type) { +function string({args: [arg], ...context}: GroqFunctionOptions): string | null { + const base = evaluate({...context, node: arg}) + switch (typeof base) { case 'number': case 'string': - case 'boolean': - case 'datetime': - return fromString(`${value.data}`) - default: - return NULL_VALUE - } -} -_global['string'].arity = 1 - -_global['references'] = async function references(args, scope, execute) { - const pathSet = new Set() - for (const arg of args) { - const path = await execute(arg, scope) - if (path.type === 'string') { - pathSet.add(path.data) - } else if (path.isArray()) { - for await (const elem of path) { - if (elem.type === 'string') { - pathSet.add(elem.data) - } - } + case 'boolean': { + return `${base}` + } + default: { + return null } } - - if (pathSet.size === 0) { - return FALSE_VALUE - } - - const scopeValue = await scope.value.get() - return hasReference(scopeValue, pathSet) ? TRUE_VALUE : FALSE_VALUE } -_global['references'].arity = (c) => c >= 1 +string.arity = 1 -_global['round'] = async function round(args, scope, execute) { - const value = await execute(args[0], scope) - if (value.type !== 'number') { - return NULL_VALUE - } +function references({args, ...context}: GroqFunctionOptions): boolean { + const paths = new Set( + args.flatMap((arg) => { + const base = evaluate({...context, node: arg}) + return (Array.isArray(base) ? base : [base]).filter((i) => typeof i === 'string') + }), + ) + return hasReference(context.scope.at(-1), paths) +} +references.arity = (c: number) => c >= 1 - const num = value.data - let prec = 0 +function round({args: [baseArg, precisionArg], ...context}: GroqFunctionOptions): number | null { + const base = evaluate({...context, node: baseArg}) + if (typeof base !== 'number') return null - if (args.length === 2) { - const precValue = await execute(args[1], scope) - if (precValue.type !== 'number' || precValue.data < 0 || !Number.isInteger(precValue.data)) { - return NULL_VALUE + let precision = 0 + if (precisionArg) { + const p = evaluate({...context, node: precisionArg}) + if (typeof p !== 'number' || p < 0 || !Number.isInteger(p)) { + return null } - prec = precValue.data + precision = p } - if (prec === 0) { - if (num < 0) { - // JavaScript's round() function will always rounds towards positive infinity (-3.5 -> -3). - // The behavior we're interested in is to "round half away from zero". - return fromNumber(-Math.round(-num)) - } - return fromNumber(Math.round(num)) + if (precision === 0) { + // Round half away from zero for negative values + return base < 0 ? -Math.round(-base) : Math.round(base) } - return fromNumber(Number(num.toFixed(prec))) -} -_global['round'].arity = (count) => count >= 1 && count <= 2 -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -_global['now'] = async function now(_args, scope) { - return fromString(scope.context.timestamp.toISOString()) + return Number(base.toFixed(precision)) } -_global['now'].arity = 0 +round.arity = (c: number) => c >= 1 && c <= 2 -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -_global['boost'] = async function boost() { - // This should be handled by the scoring function. - throw new Error('unexpected boost call') +function boost(): never { + throw new Error('Unexpected boost call') } +boost.arity = 2 -_global['boost'].arity = 2 - -const string: FunctionSet = {} - -string['lower'] = async function (args, scope, execute) { - const value = await execute(args[0], scope) - - if (value.type !== 'string') { - return NULL_VALUE - } - - return fromString(value.data.toLowerCase()) +function lower({args: [baseArg], ...context}: GroqFunctionOptions): string | null { + const base = evaluate({...context, node: baseArg}) + if (typeof base !== 'string') return null + return base.toLowerCase() } -string['lower'].arity = 1 +lower.arity = 1 -string['upper'] = async function (args, scope, execute) { - const value = await execute(args[0], scope) - - if (value.type !== 'string') { - return NULL_VALUE - } - - return fromString(value.data.toUpperCase()) +function upper({args: [baseArg], ...context}: GroqFunctionOptions): string | null { + const base = evaluate({...context, node: baseArg}) + if (typeof base !== 'string') return null + return base.toUpperCase() } -string['upper'].arity = 1 +upper.arity = 1 -string['split'] = async function (args, scope, execute) { - const str = await execute(args[0], scope) - if (str.type !== 'string') { - return NULL_VALUE - } - const sep = await execute(args[1], scope) - if (sep.type !== 'string') { - return NULL_VALUE - } +function split({args: [baseArg, separatorArg], ...context}: GroqFunctionOptions): string[] | null { + const base = evaluate({...context, node: baseArg}) + if (typeof base !== 'string') return null + const separator = evaluate({...context, node: separatorArg}) + if (typeof separator !== 'string') return null + if (!base.length) return [] - if (str.data.length === 0) { - return fromJS([]) - } - if (sep.data.length === 0) { - // This uses a Unicode codepoint splitting algorithm - return fromJS(Array.from(str.data)) - } - return fromJS(str.data.split(sep.data)) + if (!separator.length) return Array.from(base) + return base.split(separator) } -string['split'].arity = 2 - -_global['lower'] = string['lower'] -_global['upper'] = string['upper'] - -string['startsWith'] = async function (args, scope, execute) { - const str = await execute(args[0], scope) - if (str.type !== 'string') { - return NULL_VALUE - } - - const prefix = await execute(args[1], scope) - if (prefix.type !== 'string') { - return NULL_VALUE - } +split.arity = 2 - return str.data.startsWith(prefix.data) ? TRUE_VALUE : FALSE_VALUE +function startsWith({args: [baseArg, prefixArg], ...context}: GroqFunctionOptions): boolean | null { + const base = evaluate({...context, node: baseArg}) + if (typeof base !== 'string') return null + const prefix = evaluate({...context, node: prefixArg}) + if (typeof prefix !== 'string') return null + return base.startsWith(prefix) } -string['startsWith'].arity = 2 +startsWith.arity = 2 -const array: FunctionSet = {} +function join({args: [baseArg, separatorArg], ...context}: GroqFunctionOptions): string | null { + const base = evaluate({...context, node: baseArg}) + if (!Array.isArray(base)) return null + const separator = evaluate({...context, node: separatorArg}) + if (typeof separator !== 'string') return null -array['join'] = async function (args, scope, execute) { - const arr = await execute(args[0], scope) - if (!arr.isArray()) { - return NULL_VALUE - } - const sep = await execute(args[1], scope) - if (sep.type !== 'string') { - return NULL_VALUE - } - let buf = '' - let needSep = false - for await (const elem of arr) { - if (needSep) { - buf += sep.data - } - switch (elem.type) { - case 'number': - case 'string': + const mapped: string[] = [] + for (const item of base) { + switch (typeof item) { case 'boolean': - case 'datetime': - buf += `${elem.data}` + case 'number': + case 'string': { + mapped.push(`${item}`) break - default: - return NULL_VALUE - } - needSep = true - } - return fromJS(buf) -} -array['join'].arity = 2 - -array['compact'] = async function (args, scope, execute) { - const arr = await execute(args[0], scope) - if (!arr.isArray()) { - return NULL_VALUE - } - - return new StreamValue(async function* () { - for await (const elem of arr) { - if (elem.type !== 'null') { - yield elem } - } - }) -} -array['compact'].arity = 1 - -array['unique'] = async function (args, scope, execute) { - const value = await execute(args[0], scope) - if (!value.isArray()) { - return NULL_VALUE - } - - return new StreamValue(async function* () { - const added = new Set() - for await (const iter of value) { - switch (iter.type) { - case 'number': - case 'string': - case 'boolean': - case 'datetime': - if (!added.has(iter.data)) { - added.add(iter.data) - yield iter - } - break - default: - yield iter + default: { + // early exit on invalid input + return null } } - }) -} -array['unique'].arity = 1 - -array['intersects'] = async function (args, scope, execute) { - // Intersects returns true if the two arrays have at least one element in common. Only - // primitives are supported; non-primitives are ignored. - const arr1 = await execute(args[0], scope) - if (!arr1.isArray()) { - return NULL_VALUE } - const arr2 = await execute(args[1], scope) - if (!arr2.isArray()) { - return NULL_VALUE - } - - for await (const v1 of arr1) { - for await (const v2 of arr2) { - if (isEqual(v1, v2)) { - return TRUE_VALUE - } - } - } - - return FALSE_VALUE + return mapped.join(separator) } -array['intersects'].arity = 2 +join.arity = 2 -const pt: FunctionSet = {} -pt['text'] = async function (args, scope, execute) { - const value = await execute(args[0], scope) - const text = await portableTextContent(value) - - if (text === null) { - return NULL_VALUE - } - - return fromString(text) +function compact({args: [baseArg], ...context}: GroqFunctionOptions): unknown[] | null { + const base = evaluate({...context, node: baseArg}) + if (!Array.isArray(base)) return null + return base.filter((i = null) => i !== null) } +compact.arity = 1 -pt['text'].arity = 1 +function unique({args: [baseArg], ...context}: GroqFunctionOptions): unknown[] | null { + const base = evaluate({...context, node: baseArg}) + if (!Array.isArray(base)) return null -const sanity: FunctionSet = {} -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -sanity['projectId'] = async function (_args, scope) { - if (scope.context.sanity) { - return fromString(scope.context.sanity.projectId) + // `Set`s preserve the order in which those unique values were first inserted + return Array.from( + new Set( + base.map((item) => { + switch (typeof item) { + case 'boolean': + case 'number': + case 'string': { + return `${item}` + } + default: { + return item + } + } + }), + ), + ) +} +unique.arity = 1 + +function intersects({args: [leftArg, rightArg], ...context}: GroqFunctionOptions): boolean | null { + const left = evaluate({...context, node: leftArg}) + if (!Array.isArray(left)) return null + const right = evaluate({...context, node: rightArg}) + if (!Array.isArray(right)) return null + + const createSet = (left: unknown[]) => + new Set( + left + .filter( + (i) => + i === undefined || + i === null || + typeof i === 'boolean' || + typeof i === 'number' || + typeof i === 'string', + ) + .map((i = null) => `${i}`), + ) + + const leftSet = createSet(left) + const rightSet = createSet(right) + + if (typeof Set.prototype.isDisjointFrom === 'function') { + return !leftSet.isDisjointFrom(rightSet) + } + + for (const item of leftSet) { + if (rightSet.has(item)) return true } - - return NULL_VALUE + return false } -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -sanity['dataset'] = async function (_args, scope) { - if (scope.context.sanity) { - return fromString(scope.context.sanity.dataset) - } - - return NULL_VALUE -} - -// eslint-disable-next-line require-await -sanity['versionsOf'] = async function (args, scope, execute) { - if (!scope.source.isArray()) return NULL_VALUE - - const value = await execute(args[0], scope) - if (value.type !== 'string') return NULL_VALUE - const baseId = value.data - - // All the document are a version of the given ID if: - // 1. Document ID is of the form bundleId.documentGroupId - // 2. And, they have a field called _version which is an object. - const versionIds: string[] = [] - for await (const value of scope.source) { - if (getType(value) === 'object') { - const val = await value.get() - if ( - val && - '_id' in val && - val._id.split('.').length === 2 && - val._id.endsWith(`.${baseId}`) && - '_version' in val && - typeof val._version === 'object' - ) { - versionIds.push(val._id) - } - } - } - - return fromJS(versionIds) -} -sanity['versionsOf'].arity = 1 - -// eslint-disable-next-line require-await -sanity['partOfRelease'] = async function (args, scope, execute) { - if (!scope.source.isArray()) return NULL_VALUE - - const value = await execute(args[0], scope) - if (value.type !== 'string') return NULL_VALUE - const baseId = value.data - - // A document belongs to a bundle ID if: - // 1. Document ID is of the form bundleId.documentGroupId - // 2. And, they have a field called _version which is an object. - const documentIdsInBundle: string[] = [] - for await (const value of scope.source) { - if (getType(value) === 'object') { - const val = await value.get() - if ( - val && - '_id' in val && - val._id.split('.').length === 2 && - val._id.startsWith(`${baseId}.`) && - '_version' in val && - typeof val._version === 'object' - ) { - documentIdsInBundle.push(val._id) +intersects.arity = 2 + +function text({args: [baseArg], ...context}: GroqFunctionOptions): string | null { + const base = evaluate({...context, node: baseArg}) + try { + return toPlainText(base as PortableTextBlock | ArbitraryTypedObject[] | PortableTextBlock[]) + } catch { + return null + } +} +text.arity = 1 + +function projectId({sanity}: GroqFunctionOptions): string | null { + return sanity?.projectId ?? null +} +projectId.arity = 0 + +function dataset({sanity}: GroqFunctionOptions): string | null { + return sanity?.dataset ?? null +} +dataset.arity = 0 + +function versionsOf({args: [baseArg], ...context}: GroqFunctionOptions): string[] | null { + const root = context.scope.at(0) + if (!Array.isArray(root)) return null + + const baseId = evaluate({...context, node: baseArg}) + if (typeof baseId !== 'string') return null + + return root + .filter((value: unknown): value is {_id: string; _version: unknown} => { + // All the document are a version of the given ID if: + if (!value) return false + if (typeof value !== 'object') return false + if (!('_id' in value) || typeof value._id !== 'string') return false + const id = value._id + // 1. Document ID is of the form bundleId.documentGroupId + const idIsVersionOfBaseId = id.split('.').length === 2 && id.endsWith(`.${baseId}`) + if (!idIsVersionOfBaseId) return false + + // 2. And, they have a field called _version which is an object. + return '_version' in value && typeof value._version === 'object' + }, []) + .map((i) => i._id) +} +versionsOf.arity = 1 + +function partOfRelease({args: [baseArg], ...context}: GroqFunctionOptions): string[] | null { + const root = context.scope.at(0) + if (!Array.isArray(root)) return null + + const baseId = evaluate({...context, node: baseArg}) + if (typeof baseId !== 'string') return null + + return root + .filter((value: unknown): value is {_id: string; _version: unknown} => { + // A document belongs to a bundle ID if: + if (!value) return false + if (typeof value !== 'object') return false + if (!('_id' in value) || typeof value._id !== 'string') return false + const id = value._id + + // 1. Document ID is of the form bundleId.documentGroupId + const idIsVersionOfBaseId = id.split('.').length === 2 && id.startsWith(`${baseId}.`) + if (!idIsVersionOfBaseId) return false + + // 2. And, they have a field called _version which is an object. + return '_version' in value && typeof value._version === 'object' + }) + .map((value) => value._id) +} +partOfRelease.arity = 1 + +function order({base, args, ...context}: GroqPipeFunctionOptions): unknown[] | null { + if (!Array.isArray(base)) return null + + return base + .map((value, index) => ({value, index})) + .sort((a, b) => { + for (const ordering of args) { + const direction = ordering.type === 'Desc' ? -1 : 1 + const fieldNode = + ordering.type === 'Asc' || ordering.type === 'Desc' ? ordering.base : ordering + + const aResult = evaluate({...context, scope: [...context.scope, a.value], node: fieldNode}) + const bResult = evaluate({...context, scope: [...context.scope, b.value], node: fieldNode}) + + try { + const result = compare(aResult, bResult) + if (result !== 0) return result * direction + } catch { + // if `compare` threw due to type mismatches, we can default to + // sorting by type if they differ + const aTypeRank = getTypeRank(aResult) + const bTypeRank = getTypeRank(bResult) + if (aTypeRank === bTypeRank) continue + return (aTypeRank - bTypeRank) * direction + } } - } - } - - return fromJS(documentIdsInBundle) -} -sanity['partOfRelease'].arity = 1 - -export type GroqPipeFunction = ( - base: Value, - args: ExprNode[], - scope: Scope, - execute: Executor, -) => PromiseLike - -export const pipeFunctions: {[key: string]: WithOptions} = {} - -pipeFunctions['order'] = async function order(base, args, scope, execute) { - // eslint-disable-next-line max-len - // This is a workaround for https://github.com/rpetrich/babel-plugin-transform-async-to-promises/issues/59 - await true - - if (!base.isArray()) { - return NULL_VALUE - } - - const mappers = [] - const directions: string[] = [] - let n = 0 - - for (let mapper of args) { - let direction = 'asc' - - if (mapper.type === 'Desc') { - direction = 'desc' - mapper = mapper.base - } else if (mapper.type === 'Asc') { - mapper = mapper.base - } - - mappers.push(mapper) - directions.push(direction) - n++ - } - const aux = [] - let idx = 0 - - for await (const value of base) { - const newScope = scope.createNested(value) - const tuple = [await value.get(), idx] - for (let i = 0; i < n; i++) { - const result = await execute(mappers[i], newScope) - tuple.push(await result.get()) + return a.index - b.index + }) + .map((i) => i.value) +} +order.arity = (count: number) => count >= 1 + +function score({base, args, ...context}: GroqPipeFunctionOptions): ObjectWithScore[] | null { + if (!Array.isArray(base)) return null + + return base + .filter((item: unknown): item is object => typeof item !== 'object' && !!item) + .map((item) => { + const prevScore = '_score' in item && typeof item._score === 'number' ? item._score : 0 + const score = args.reduce((acc, arg) => { + return ( + acc + + evaluateScore({ + ...context, + node: arg, + scope: [...context.scope, item], + }) + ) + }, prevScore) + return Object.assign({}, item, {_score: score}) + }) + .sort((a, b) => a._score - b._score) +} +score.arity = (count: number) => count >= 1 + +function operation({after, before}: GroqFunctionOptions): 'update' | 'create' | 'delete' | null { + if (after && before) return 'update' + if (after) return 'create' + if (before) return 'delete' + return null +} + +function min({args: [baseArg], ...context}: GroqFunctionOptions): number | null { + const base = evaluate({...context, node: baseArg}) + if (!Array.isArray(base)) return null + + const nonNullBase = base.filter((item = null) => item !== null) + if (!nonNullBase.length) return null + + let min = Infinity + for (const item of nonNullBase) { + // early exit if a non-null, non-number is found + if (typeof item !== 'number') return null + if (item < min) { + min = item } - aux.push(tuple) - idx++ } - - aux.sort((aTuple, bTuple) => { - for (let i = 0; i < n; i++) { - let c = totalCompare(aTuple[i + 2], bTuple[i + 2]) - if (directions[i] === 'desc') { - c = -c - } - if (c !== 0) { - return c - } - } - // Fallback to sorting on the original index for stable sorting. - return aTuple[1] - bTuple[1] - }) - - return fromJS(aux.map((v) => v[0])) + return min } -pipeFunctions['order'].arity = (count) => count >= 1 - -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -pipeFunctions['score'] = async function score(base, args, scope, execute) { - if (!base.isArray()) return NULL_VALUE +min.arity = 1 - // Anything that isn't an object should be sorted first. - const unknown: Array = [] - const scored: Array = [] +function max({args: [baseArg], ...context}: GroqFunctionOptions): number | null { + const base = evaluate({...context, node: baseArg}) + if (!Array.isArray(base)) return null - for await (const value of base) { - if (value.type !== 'object') { - unknown.push(await value.get()) - continue - } - - const newScope = scope.createNested(value) - let valueScore = typeof value.data['_score'] === 'number' ? value.data['_score'] : 0 + const items = base.filter((item = null) => item !== null) + if (!items.length) return null - for (const arg of args) { - valueScore += await evaluateScore(arg, newScope, execute) + let max = -Infinity + for (const item of items) { + // early exit if a non-null, non-number is found + if (typeof item !== 'number') return null + if (item > max) { + max = item } - - const newObject = Object.assign({}, value.data, {_score: valueScore}) - scored.push(newObject) - } - - scored.sort((a, b) => b._score - a._score) - return fromJS(scored) -} - -pipeFunctions['score'].arity = (count) => count >= 1 - -type ObjectWithScore = Record & {_score: number} - -const delta: FunctionSet = {} -// eslint-disable-next-line require-await -// eslint-disable-next-line require-await -delta['operation'] = async function (_args, scope) { - const hasBefore = scope.context.before !== null - const hasAfter = scope.context.after !== null - - if (hasBefore && hasAfter) { - return fromString('update') - } - - if (hasAfter) { - return fromString('create') } - - if (hasBefore) { - return fromString('delete') - } - - return NULL_VALUE -} - -delta['changedAny'] = () => { - throw new Error('not implemented') -} -delta['changedAny'].arity = 1 -delta['changedAny'].mode = 'delta' - -delta['changedOnly'] = () => { - throw new Error('not implemented') + return max } -delta['changedOnly'].arity = 1 -delta['changedOnly'].mode = 'delta' +max.arity = 1 -const diff: FunctionSet = {} -diff['changedAny'] = () => { - throw new Error('not implemented') -} -diff['changedAny'].arity = 3 +function sum({args: [baseArg], ...context}: GroqFunctionOptions): number | null { + const base = evaluate({...context, node: baseArg}) + if (!Array.isArray(base)) return null -diff['changedOnly'] = () => { - throw new Error('not implemented') -} -diff['changedOnly'].arity = 3 + const items = base.filter((item = null) => item !== null) + if (!items.length) return null -const math: FunctionSet = {} -math['min'] = async function (args, scope, execute) { - const arr = await execute(args[0], scope) - if (!arr.isArray()) { - return NULL_VALUE + let sum = 0 + for (const item of items) { + // early exit if a non-null, non-number is found + if (typeof item !== 'number') return null + sum += item } - - let n: number | undefined - for await (const elem of arr) { - if (elem.type === 'null') continue - if (elem.type !== 'number') { - return NULL_VALUE - } - if (n === undefined || elem.data < n) { - n = elem.data - } - } - return fromJS(n) + return sum } -math['min'].arity = 1 +sum.arity = 1 -math['max'] = async function (args, scope, execute) { - const arr = await execute(args[0], scope) - if (!arr.isArray()) { - return NULL_VALUE - } +function avg({args: [baseArg], ...context}: GroqFunctionOptions): number | null { + const base = evaluate({...context, node: baseArg}) + if (!Array.isArray(base)) return null - let n: number | undefined - for await (const elem of arr) { - if (elem.type === 'null') continue - if (elem.type !== 'number') { - return NULL_VALUE - } - if (n === undefined || elem.data > n) { - n = elem.data - } - } - return fromJS(n) -} -math['max'].arity = 1 + const items = base.filter((item = null) => item !== null) + if (!items.length) return null -math['sum'] = async function (args, scope, execute) { - const arr = await execute(args[0], scope) - if (!arr.isArray()) { - return NULL_VALUE + let sum = 0 + for (const item of items) { + // early exit if a non-null, non-number is found + if (typeof item !== 'number') return null + sum += item } - let n = 0 - for await (const elem of arr) { - if (elem.type === 'null') continue - if (elem.type !== 'number') { - return NULL_VALUE - } - n += elem.data - } - return fromJS(n) + if (!items.length) return null + return sum / items.length } -math['sum'].arity = 1 +avg.arity = 1 -math['avg'] = async function (args, scope, execute) { - const arr = await execute(args[0], scope) - if (!arr.isArray()) { - return NULL_VALUE - } - - let n = 0 - let c = 0 - for await (const elem of arr) { - if (elem.type === 'null') continue - if (elem.type !== 'number') { - return NULL_VALUE - } - n += elem.data - c++ - } - if (c === 0) { - return NULL_VALUE - } - return fromJS(n / c) +function now({timestamp}: GroqFunctionOptions): string { + return timestamp } -math['avg'].arity = 1 +now.arity = 0 -const dateTime: FunctionSet = {} -dateTime['now'] = async function now(_args, scope) { - return fromDateTime(new DateTime(scope.context.timestamp)) -} -dateTime['now'].arity = 0 +export const pipeFunctions: {[key: string]: WithOptions} = {order, score} export const namespaces: NamespaceSet = { - global: _global, - string, - array, - pt, - delta, - diff, - sanity, - math, - dateTime, + global: { + anywhere, + coalesce, + count, + dateTime, + defined, + identity, + length, + path, + string, + references, + round, + now, + boost, + lower, + upper, + }, + string: {lower, upper, split, startsWith}, + array: {join, compact, unique, intersects}, + pt: {text}, + delta: { + operation, + changedAny: createStub({arity: 1, mode: 'delta'}), + changedOnly: createStub({arity: 1, mode: 'delta'}), + }, + diff: { + changedAny: createStub({arity: 3}), + changedOnly: createStub({arity: 3}), + }, + sanity: {projectId, dataset, versionsOf, partOfRelease}, + math: {min, max, sum, avg}, + dateTime: {now}, } diff --git a/src/evaluator/index.ts b/src/evaluator/index.ts index f864e247..23d123e4 100644 --- a/src/evaluator/index.ts +++ b/src/evaluator/index.ts @@ -1,2 +1 @@ -export {tryConstantEvaluate} from './constantEvaluate' -export {evaluateQuery as evaluate} from './evaluate' +export {evaluate, evaluateQuery} from './evaluate' diff --git a/src/evaluator/matching.ts b/src/evaluator/matching.ts index 6942fba7..5c658a81 100644 --- a/src/evaluator/matching.ts +++ b/src/evaluator/matching.ts @@ -1,15 +1,11 @@ -import type {Value} from '../values' - const CHARS = /([^!@#$%^&*(),\\/?";:{}|[\]+<>\s-])+/g const CHARS_WITH_WILDCARD = /([^!@#$%^&(),\\/?";:{}|[\]+<>\s-])+/g const EDGE_CHARS = /(\b\.+|\.+\b)/g const MAX_TERM_LENGTH = 1024 -export type Token = string - -export type Pattern = (tokens: Token[]) => boolean +export type Pattern = (tokens: string[]) => boolean -export function matchText(tokens: Token[], patterns: Pattern[]): boolean { +export function matchText(tokens: string[], patterns: Pattern[]): boolean { if (tokens.length === 0 || patterns.length === 0) { return false } @@ -17,13 +13,13 @@ export function matchText(tokens: Token[], patterns: Pattern[]): boolean { return patterns.every((pattern) => pattern(tokens)) } -export function matchTokenize(text: string): Token[] { +export function matchTokenize(text: string): string[] { return text.replace(EDGE_CHARS, '').match(CHARS) || [] } export function matchAnalyzePattern(text: string): Pattern[] { const termsRe = matchPatternRegex(text) - return termsRe.map((re) => (tokens: Token[]) => tokens.some((token) => re.test(token))) + return termsRe.map((re) => (tokens: string[]) => tokens.some((token) => re.test(token))) } export function matchPatternRegex(text: string): RegExp[] { @@ -32,24 +28,3 @@ export function matchPatternRegex(text: string): RegExp[] { (term) => new RegExp(`^${term.slice(0, MAX_TERM_LENGTH).replace(/\*/g, '.*')}$`, 'i'), ) } - -export async function gatherText(value: Value, cb: (str: string) => void): Promise { - if (value.type === 'string') { - cb(value.data) - return true - } - - if (value.isArray()) { - let success = true - for await (const part of value) { - if (part.type === 'string') { - cb(part.data) - } else { - success = false - } - } - return success - } - - return false -} diff --git a/src/evaluator/operators.ts b/src/evaluator/operators.ts index 666d9147..ec67676f 100644 --- a/src/evaluator/operators.ts +++ b/src/evaluator/operators.ts @@ -1,185 +1,184 @@ -import type {OpCall} from '../nodeTypes' -import { - FALSE_VALUE, - fromDateTime, - fromJS, - fromNumber, - fromString, - NULL_VALUE, - StreamValue, - TRUE_VALUE, - type Value, -} from '../values' +/* eslint-disable complexity */ +/* eslint-disable max-statements */ + +import type {OpCallNode} from '../nodeTypes' import {isEqual} from './equality' -import { - gatherText, - matchAnalyzePattern, - matchText, - matchTokenize, - type Pattern, - type Token, -} from './matching' -import {partialCompare} from './ordering' - -type GroqOperatorFn = (left: Value, right: Value) => Value | PromiseLike - -export const operators: {[key in OpCall]: GroqOperatorFn} = { - '==': function eq(left, right) { - return isEqual(left, right) ? TRUE_VALUE : FALSE_VALUE - }, - - '!=': function neq(left, right) { - return isEqual(left, right) ? FALSE_VALUE : TRUE_VALUE - }, - - '>': function gt(left, right) { - if (left.type === 'stream' || right.type === 'stream') return NULL_VALUE - const result = partialCompare(left.data, right.data) - - if (result === null) { - return NULL_VALUE - } - return result > 0 ? TRUE_VALUE : FALSE_VALUE - }, +import {evaluate, isIso8601} from './evaluate' +import {matchAnalyzePattern, matchText, matchTokenize} from './matching' +import {compare} from './ordering' +import type {Context} from './types' - '>=': function gte(left, right) { - if (left.type === 'stream' || right.type === 'stream') return NULL_VALUE - const result = partialCompare(left.data, right.data) +interface EvaluateOpCallOptions extends Context { + node: OpCallNode +} - if (result === null) { - return NULL_VALUE +export function evaluateOpCall({node, ...context}: EvaluateOpCallOptions): unknown { + switch (node.op) { + case '==': { + return isEqual( + evaluate({...context, node: node.left}), + evaluate({...context, node: node.right}), + ) + } + + case '!=': { + return !isEqual( + evaluate({...context, node: node.left}), + evaluate({...context, node: node.right}), + ) + } + + case '>': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + try { + return compare(left, right) > 0 + } catch { + return null + } } - return result >= 0 ? TRUE_VALUE : FALSE_VALUE - }, - - '<': function lt(left, right) { - if (left.type === 'stream' || right.type === 'stream') return NULL_VALUE - const result = partialCompare(left.data, right.data) - if (result === null) { - return NULL_VALUE + case '>=': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + try { + return compare(left, right) >= 0 + } catch { + return null + } } - return result < 0 ? TRUE_VALUE : FALSE_VALUE - }, - '<=': function lte(left, right) { - if (left.type === 'stream' || right.type === 'stream') return NULL_VALUE - const result = partialCompare(left.data, right.data) - - if (result === null) { - return NULL_VALUE - } - return result <= 0 ? TRUE_VALUE : FALSE_VALUE - }, - - // eslint-disable-next-line func-name-matching - 'in': async function inop(left, right) { - if (right.type === 'path') { - if (left.type !== 'string') { - return NULL_VALUE + case '<': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + try { + return compare(left, right) < 0 + } catch { + return null } - - return right.data.matches(left.data) ? TRUE_VALUE : FALSE_VALUE } - if (right.isArray()) { - for await (const b of right) { - if (isEqual(left, b)) { - return TRUE_VALUE - } + case '<=': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + try { + return compare(left, right) <= 0 + } catch { + return null } - - return FALSE_VALUE } - return NULL_VALUE - }, - - 'match': async function match(left, right) { - let tokens: Token[] = [] - let patterns: Pattern[] = [] + case 'in': { + const left = evaluate({...context, node: node.left}) + + // for `path` functions we don't evaluate it because evaluating a `path` + // function in all other scenarios returns the value inside the `path` + // function if it's a string (null otherwise). we check the node before + // evaluating to ensure that what we're checking is within a path function + if ( + node.right.type === 'FuncCall' && + node.right.name === 'path' && + node.right.namespace === 'global' + ) { + if (typeof left !== 'string') return null + const pattern = evaluate({...context, node: node.right.args[0]}) + + if (typeof pattern !== 'string') return null + return new RegExp( + `^${pattern + .split('.') + .map((part) => { + if (part === '*') return '[^.]+' + if (part === '**') return '.*' + return part.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + }) + .join('.')}$`, + ).test(left) + } - await gatherText(left, (part) => { - tokens = tokens.concat(matchTokenize(part)) - }) + const right = evaluate({...context, node: node.right}) - const didSucceed = await gatherText(right, (part) => { - patterns = patterns.concat(matchAnalyzePattern(part)) - }) - if (!didSucceed) { - return FALSE_VALUE + if (!Array.isArray(right)) return null + return right.some((item) => isEqual(left, item)) } - const matched = matchText(tokens, patterns) + case 'match': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) - return matched ? TRUE_VALUE : FALSE_VALUE - }, + const tokens = (Array.isArray(left) ? left : [left]) + .filter((i) => typeof i === 'string') + .flatMap(matchTokenize) + const patterns = (Array.isArray(right) ? right : [right]) + .filter((i) => typeof i === 'string') + .flatMap(matchAnalyzePattern) - '+': function plus(left, right) { - if (left.type === 'datetime' && right.type === 'number') { - return fromDateTime(left.data.add(right.data)) + if (!patterns.length) return false + return matchText(tokens, patterns) } - if (left.type === 'number' && right.type === 'number') { - return fromNumber(left.data + right.data) - } + case '+': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) - if (left.type === 'string' && right.type === 'string') { - return fromString(left.data + right.data) + if (isIso8601(left) && typeof right === 'number') { + return new Date(new Date(left).getTime() + right * 1000).toISOString() + } + if (typeof left === 'number' && typeof right === 'number') return left + right + if (typeof left === 'string' && typeof right === 'string') return `${left}${right}` + if (Array.isArray(left) && Array.isArray(right)) return [...left, ...right] + if (typeof left === 'object' && left && typeof right === 'object' && right) { + return {...left, ...right} + } + return null } - if (left.type === 'object' && right.type === 'object') { - return fromJS({...left.data, ...right.data}) - } + case '-': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) - if (left.type === 'array' && right.type === 'array') { - return fromJS(left.data.concat(right.data)) - } + if (isIso8601(left) && typeof right === 'number') { + return new Date(new Date(left).getTime() - right * 1000).toISOString() + } - if (left.isArray() && right.isArray()) { - return new StreamValue(async function* () { - for await (const val of left) { - yield val - } + if (isIso8601(left) && isIso8601(right)) { + return (new Date(left).getTime() - new Date(right).getTime()) / 1000 + } - for await (const val of right) { - yield val - } - }) + if (typeof left === 'number' && typeof right === 'number') return left - right + return null } - return NULL_VALUE - }, - - '-': function minus(left, right) { - if (left.type === 'datetime' && right.type === 'number') { - return fromDateTime(left.data.add(-right.data)) + case '*': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + if (typeof left !== 'number' || typeof right !== 'number') return null + return left * right } - if (left.type === 'datetime' && right.type === 'datetime') { - return fromNumber(left.data.difference(right.data)) + case '/': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + if (typeof left !== 'number' || typeof right !== 'number') return null + return left / right } - if (left.type === 'number' && right.type === 'number') { - return fromNumber(left.data - right.data) + case '%': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + if (typeof left !== 'number' || typeof right !== 'number') return null + return left % right } - return NULL_VALUE - }, - - '*': numericOperator((a, b) => a * b), - '/': numericOperator((a, b) => a / b), - '%': numericOperator((a, b) => a % b), - '**': numericOperator((a, b) => Math.pow(a, b)), -} - -function numericOperator(impl: (a: number, b: number) => number): GroqOperatorFn { - return function (left, right) { - if (left.type === 'number' && right.type === 'number') { - const result = impl(left.data, right.data) - return fromNumber(result) + case '**': { + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + if (typeof left !== 'number' || typeof right !== 'number') return null + return left ** right } - return NULL_VALUE + default: { + throw new Error(`Unknown operator: ${node.op}`) + } } } diff --git a/src/evaluator/ordering.ts b/src/evaluator/ordering.ts index a35c31b7..7c80f020 100644 --- a/src/evaluator/ordering.ts +++ b/src/evaluator/ordering.ts @@ -1,51 +1,45 @@ -import {getType, type GroqType} from '../values' - -const TYPE_ORDER: {[key in GroqType]?: number} = { - datetime: 1, - number: 2, - string: 3, - boolean: 4, +import {isIso8601} from './evaluate' + +export function getTypeRank(value: unknown): number { + if (isIso8601(value)) return 1 + if (typeof value === 'number') return 2 + if (typeof value === 'string') return 3 + if (typeof value === 'boolean') return 4 + return 100 } -// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types -export function partialCompare(a: any, b: any): null | number { - const aType = getType(a) - const bType = getType(b) - - if (aType !== bType) { - return null +export function compare(a: unknown, b: unknown): number { + // Check if both values have the same type. + if (typeof a !== typeof b) { + throw new Error('Cannot compare values of different types') } - switch (aType) { - case 'number': - case 'boolean': - return a - b - case 'string': - if (a < b) return -1 - if (a > b) return 1 - return 0 - case 'datetime': - return a.compareTo(b) - default: - return null + // For numbers and booleans. + if (typeof a === 'number' || typeof a === 'boolean') { + if (a < (b as number | boolean)) return -1 + if (a > (b as number | boolean)) return 1 + return 0 } -} -// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types -export function totalCompare(a: any, b: any): number { - const aType = getType(a) - const bType = getType(b) + // For strings. + if (typeof a === 'string' && typeof b === 'string') { + // If both strings are ISO 8601 datetime strings, compare as dates. + if (isIso8601(a) && isIso8601(b)) { + const dateA = new Date(a) + const dateB = new Date(b) - const aTypeOrder = TYPE_ORDER[aType] || 100 - const bTypeOrder = TYPE_ORDER[bType] || 100 + // Use numeric comparison on the epoch times. + if (dateA.getTime() < dateB.getTime()) return -1 + if (dateA.getTime() > dateB.getTime()) return 1 + return 0 + } - if (aTypeOrder !== bTypeOrder) { - return aTypeOrder - bTypeOrder + // Otherwise, compare as ordinary strings. + if (a < b) return -1 + if (a > b) return 1 + return 0 } - let result = partialCompare(a, b) - if (result === null) { - result = 0 - } - return result + // For unsupported types. + throw new Error('Unsupported type: only numbers, booleans, and strings are supported') } diff --git a/src/evaluator/pt.ts b/src/evaluator/pt.ts deleted file mode 100644 index a28a3d87..00000000 --- a/src/evaluator/pt.ts +++ /dev/null @@ -1,47 +0,0 @@ -import type {Value} from '../values' - -export async function portableTextContent(value: Value): Promise { - if (value.type === 'object') { - return blockText(value.data) - } else if (value.isArray()) { - const texts = await arrayText(value) - if (texts.length > 0) { - return texts.join('\n\n') - } - } - - return null -} - -async function arrayText(value: Value, result: string[] = []): Promise { - for await (const block of value) { - if (block.type === 'object') { - const text = blockText(block.data) - if (text !== null) result.push(text) - } else if (block.isArray()) { - await arrayText(block, result) - } - } - - return result -} - -function blockText(obj: Record): string | null { - if (typeof obj['_type'] !== 'string') return null - const children = obj['children'] - if (!Array.isArray(children)) return null - - let result = '' - for (const child of children) { - if ( - child && - typeof child === 'object' && - typeof child._type === 'string' && - child._type === 'span' && - typeof child.text === 'string' - ) { - result += child.text - } - } - return result -} diff --git a/src/evaluator/scope.ts b/src/evaluator/scope.ts deleted file mode 100644 index 65dbfc75..00000000 --- a/src/evaluator/scope.ts +++ /dev/null @@ -1,39 +0,0 @@ -import type {Value} from '../values' -import type {Context} from './types' - -export class Scope { - public params: Record - public source: Value - public value: Value - public parent: Scope | null - public context: Context - public isHidden = false - - // eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types - constructor( - params: Record, - source: Value, - value: Value, - context: Context, - parent: Scope | null, - ) { - this.params = params - this.source = source - this.value = value - this.context = context - this.parent = parent - } - - createNested(value: Value): Scope { - if (this.isHidden) { - return new Scope(this.params, this.source, value, this.context, this.parent) - } - return new Scope(this.params, this.source, value, this.context, this) - } - - createHidden(value: Value): Scope { - const result = this.createNested(value) - result.isHidden = true - return result - } -} diff --git a/src/evaluator/scoring.ts b/src/evaluator/scoring.ts index 0a108e65..8f012f5e 100644 --- a/src/evaluator/scoring.ts +++ b/src/evaluator/scoring.ts @@ -1,83 +1,55 @@ import type {ExprNode} from '../nodeTypes' -import {gatherText, matchPatternRegex, matchTokenize, type Token} from './matching' -import {Scope} from './scope' -import type {Executor} from './types' +import {evaluate} from './evaluate' +import {matchPatternRegex, matchTokenize} from './matching' +import type {Context} from './types' // BM25 similarity constants const BM25k = 1.2 -export async function evaluateScore( - node: ExprNode, - scope: Scope, - execute: Executor, -): Promise { +interface EvaluateScoreOptions extends Context { + node: ExprNode +} + +export function evaluateScore({node, ...context}: EvaluateScoreOptions): number { if (node.type === 'OpCall' && node.op === 'match') { - return evaluateMatchScore(node.left, node.right, scope, execute) + const left = evaluate({...context, node: node.left}) + const right = evaluate({...context, node: node.right}) + const leftStrings = (Array.isArray(left) ? left : [left]).filter((i) => typeof i === 'string') + const rightStrings = (Array.isArray(right) ? right : [right]).filter( + (i) => typeof i === 'string', + ) + if (rightStrings.length) return 0 + + const tokens = leftStrings.flatMap(matchTokenize) + const terms = rightStrings.flatMap(matchPatternRegex) + if (!tokens.length || !terms.length) return 0 + + return terms.reduce((score, re) => { + const freq = tokens.reduce((c, token) => c + (re.test(token) ? 1 : 0), 0) + return score + (freq * (BM25k + 1)) / (freq + BM25k) + }, 0) } if (node.type === 'FuncCall' && node.name === 'boost') { - const innerScore = await evaluateScore(node.args[0], scope, execute) - const boost = await execute(node.args[1], scope) - if (boost.type === 'number' && innerScore > 0) { - return innerScore + boost.data - } - + const [baseArg, boostArg] = node.args + const score = evaluateScore({...context, node: baseArg}) + const boost = evaluate({...context, node: boostArg}) + if (typeof boost === 'number' && score > 0) return score + boost return 0 } - switch (node.type) { - case 'Or': { - const leftScore = await evaluateScore(node.left, scope, execute) - const rightScore = await evaluateScore(node.right, scope, execute) - return leftScore + rightScore - } - case 'And': { - const leftScore = await evaluateScore(node.left, scope, execute) - const rightScore = await evaluateScore(node.right, scope, execute) - if (leftScore === 0 || rightScore === 0) return 0 - return leftScore + rightScore - } - default: { - const res = await execute(node, scope) - return res.type === 'boolean' && res.data === true ? 1 : 0 - } + if (node.type === 'Or') { + const leftScore = evaluateScore({...context, node: node.left}) + const rightScore = evaluateScore({...context, node: node.right}) + return leftScore + rightScore } -} - -async function evaluateMatchScore( - left: ExprNode, - right: ExprNode, - scope: Scope, - execute: Executor, -): Promise { - const text = await execute(left, scope) - const pattern = await execute(right, scope) - - let tokens: Token[] = [] - let terms: RegExp[] = [] - - await gatherText(text, (part) => { - tokens = tokens.concat(matchTokenize(part)) - }) - - const didSucceed = await gatherText(pattern, (part) => { - terms = terms.concat(matchPatternRegex(part)) - }) - - if (!didSucceed) { - return 0 - } - - if (tokens.length === 0 || terms.length === 0) { - return 0 - } - - let score = 0 - for (const re of terms) { - const freq = tokens.reduce((c, token) => c + (re.test(token) ? 1 : 0), 0) - score += (freq * (BM25k + 1)) / (freq + BM25k) + if (node.type === 'And') { + const leftScore = evaluateScore({...context, node: node.left}) + const rightScore = evaluateScore({...context, node: node.right}) + if (leftScore === 0 || rightScore === 0) return 0 + return leftScore + rightScore } - return score + return evaluate({...context, node}) === true ? 1 : 0 } diff --git a/src/evaluator/types.ts b/src/evaluator/types.ts index 134c0abe..75a22293 100644 --- a/src/evaluator/types.ts +++ b/src/evaluator/types.ts @@ -1,8 +1,3 @@ -import type {ExprNode} from '../nodeTypes' -import type {Value} from '../values' -import {Scope} from './scope' - -export type Executor = (node: N, scope: Scope) => Value | PromiseLike export type Document = { _id?: string _type?: string @@ -10,46 +5,43 @@ export type Document = { } export type DereferenceFunction = (obj: {_ref: string}) => PromiseLike -export interface EvaluateOptions { - // The value that will be available as `@` in GROQ. - root?: any - - // The value that will be available as `*` in GROQ. - dataset?: any - - // Parameters availble in the GROQ query (using `$param` syntax). - params?: Record - - // The timestamp returned from now() - timestamp?: Date - - // Value used for identity() - identity?: string - - // The value returned from before() in Delta-mode - before?: any - - // The value returned from after() in Delta-mode - after?: any - - // Settings used for the `sanity`-functions - sanity?: { - projectId: string - dataset: string - } - - // Custom function to resolve document references - dereference?: DereferenceFunction +export interface EvaluateQueryOptions + extends Partial< + Pick + > { + dataset?: unknown } export interface Context { - timestamp: Date + /** + * User identity, the value of `identity()` + */ identity: string - before: Value | null - after: Value | null + /** + * Scopes used for this evaluation + */ + scope: unknown[] + /** + * The timestamp returned from `now()` + */ + timestamp: string + /** + * The value returned from before() in Delta-mode + */ + before?: unknown + /** + * The value returned from after() in Delta-mode + */ + after?: unknown + /** + * Parameters available in the GROQ query (using `$param` syntax). + */ + params?: Record + /** + * Settings used for the `sanity`-functions + */ sanity?: { projectId: string dataset: string } - dereference?: DereferenceFunction } diff --git a/src/parser.ts b/src/parser.ts index 54fb00b2..9d509a52 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,5 +1,5 @@ /* eslint-disable camelcase */ -import {tryConstantEvaluate} from './evaluator' +import {evaluate} from './evaluator' import {type GroqFunctionArity, namespaces, pipeFunctions} from './evaluator/functions' import {type Mark, MarkProcessor, type MarkVisitor} from './markProcessor' import type { @@ -593,15 +593,21 @@ const TRAVERSE_BUILDER: MarkVisitor<(rhs: TraversalResult | null) => TraversalRe square_bracket(p) { const expr = p.process(EXPR_BUILDER) - const value = tryConstantEvaluate(expr) - if (value && value.type === 'number') { + const value = evaluate({ + node: expr, + identity: 'me', + timestamp: new Date().toISOString(), + scope: [], + }) + + if (typeof value === 'number') { return (right) => - traverseElement((base) => ({type: 'AccessElement', base, index: value.data}), right) + traverseElement((base) => ({type: 'AccessElement', base, index: value}), right) } - if (value && value.type === 'string') { + if (typeof value === 'string') { return (right) => - traversePlain((base) => ({type: 'AccessAttribute', base, name: value.data}), right) + traversePlain((base) => ({type: 'AccessAttribute', base, name: value}), right) } return (right) => @@ -619,13 +625,20 @@ const TRAVERSE_BUILDER: MarkVisitor<(rhs: TraversalResult | null) => TraversalRe const isInclusive = p.getMark().name === 'inc_range' p.shift() - const left = p.process(EXPR_BUILDER) - const right = p.process(EXPR_BUILDER) - - const leftValue = tryConstantEvaluate(left) - const rightValue = tryConstantEvaluate(right) - - if (!leftValue || !rightValue || leftValue.type !== 'number' || rightValue.type !== 'number') { + const left = evaluate({ + node: p.process(EXPR_BUILDER), + identity: 'me', + scope: [], + timestamp: new Date().toISOString(), + }) + const right = evaluate({ + node: p.process(EXPR_BUILDER), + identity: 'me', + scope: [], + timestamp: new Date().toISOString(), + }) + + if (typeof left !== 'number' || typeof right !== 'number') { throw new GroqQueryError('slicing must use constant numbers') } @@ -634,8 +647,8 @@ const TRAVERSE_BUILDER: MarkVisitor<(rhs: TraversalResult | null) => TraversalRe (base) => ({ type: 'Slice', base, - left: leftValue.data, - right: rightValue.data, + left, + right, isInclusive, }), rhs, diff --git a/src/typeEvaluator/index.ts b/src/typeEvaluator/index.ts index b1cdda3c..df2142bc 100644 --- a/src/typeEvaluator/index.ts +++ b/src/typeEvaluator/index.ts @@ -1,23 +1,6 @@ export type {GroqFunction, GroqFunctionArg, GroqPipeFunction} from '../evaluator/functions' -export type {Scope} from '../evaluator/scope' -export type {Context, DereferenceFunction, Document, Executor} from '../evaluator/types' +export type {Context, DereferenceFunction, Document} from '../evaluator/types' export * from '../nodeTypes' -export type { - AnyStaticValue, - ArrayValue, - BooleanValue, - DateTimeValue, - GroqType, - NullValue, - NumberValue, - ObjectValue, - PathValue, - StaticValue, - StreamValue, - StringValue, - Value, -} from '../values' -export {DateTime, Path} from '../values' export {typeEvaluate} from './typeEvaluate' export {createReferenceTypeNode} from './typeHelpers' export type { diff --git a/src/typeEvaluator/matching.ts b/src/typeEvaluator/matching.ts index 92a185b0..5186ef60 100644 --- a/src/typeEvaluator/matching.ts +++ b/src/typeEvaluator/matching.ts @@ -1,14 +1,8 @@ -import { - matchAnalyzePattern, - matchText, - matchTokenize, - type Pattern, - type Token, -} from '../evaluator/matching' +import {matchAnalyzePattern, matchText, matchTokenize, type Pattern} from '../evaluator/matching' import type {ConcreteTypeNode} from './typeHelpers' export function match(left: ConcreteTypeNode, right: ConcreteTypeNode): boolean | undefined { - let tokens: Token[] = [] + let tokens: string[] = [] let patterns: Pattern[] = [] if (left.type === 'string') { if (left.value === undefined) { diff --git a/src/values/Path.ts b/src/values/Path.ts deleted file mode 100644 index 4aef8e48..00000000 --- a/src/values/Path.ts +++ /dev/null @@ -1,36 +0,0 @@ -function escapeRegExp(string: string) { - return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') -} - -function pathRegExp(pattern: string) { - const re = [] - for (const part of pattern.split('.')) { - if (part === '*') { - re.push('[^.]+') - } else if (part === '**') { - re.push('.*') - } else { - re.push(escapeRegExp(part)) - } - } - - return new RegExp(`^${re.join('.')}$`) -} - -export class Path { - private pattern: string - private patternRe: RegExp - - constructor(pattern: string) { - this.pattern = pattern - this.patternRe = pathRegExp(pattern) - } - - matches(str: string): boolean { - return this.patternRe.test(str) - } - - toJSON(): string { - return this.pattern - } -} diff --git a/src/values/StreamValue.ts b/src/values/StreamValue.ts deleted file mode 100644 index 1c1ee2df..00000000 --- a/src/values/StreamValue.ts +++ /dev/null @@ -1,76 +0,0 @@ -import type {Value} from './types' - -export class StreamValue { - type: 'stream' = 'stream' - private generator: () => AsyncGenerator - private ticker: Promise | null - private isDone: boolean - private data: Value[] - - constructor(generator: () => AsyncGenerator) { - this.generator = generator - this.ticker = null - this.isDone = false - this.data = [] - } - - // eslint-disable-next-line class-methods-use-this - isArray(): boolean { - return true - } - - async get(): Promise { - const result = [] - for await (const value of this) { - result.push(await value.get()) - } - return result - } - - async *[Symbol.asyncIterator](): AsyncGenerator { - let i = 0 - while (true) { - for (; i < this.data.length; i++) { - yield this.data[i] - } - - if (this.isDone) { - return - } - - await this._nextTick() - } - } - - _nextTick(): Promise { - if (this.ticker) { - return this.ticker - } - - let currentResolver: (value?: void | PromiseLike | undefined) => void - const setupTicker = () => { - this.ticker = new Promise((resolve) => { - currentResolver = resolve - }) - } - - const tick = () => { - currentResolver() - setupTicker() - } - - const fetch = async () => { - for await (const value of this.generator()) { - this.data.push(value) - tick() - } - - this.isDone = true - tick() - } - - setupTicker() - fetch() - return this.ticker! - } -} diff --git a/src/values/dateHelpers.ts b/src/values/dateHelpers.ts deleted file mode 100644 index 9cf9a975..00000000 --- a/src/values/dateHelpers.ts +++ /dev/null @@ -1,37 +0,0 @@ -const RFC3339_REGEX = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|([-+]\d{2}:\d{2}))$/ - -export function parseRFC3339(str: string): Date | null { - if (RFC3339_REGEX.test(str)) { - return new Date(str) - } - return null -} - -export function formatRFC3339(d: Date): string { - const year = addLeadingZero(d.getUTCFullYear(), 4) - const month = addLeadingZero(d.getUTCMonth() + 1, 2) - const day = addLeadingZero(d.getUTCDate(), 2) - const hour = addLeadingZero(d.getUTCHours(), 2) - const minute = addLeadingZero(d.getUTCMinutes(), 2) - const second = addLeadingZero(d.getUTCSeconds(), 2) - - let fractionalSecond = '' - const millis = d.getMilliseconds() - if (millis != 0) { - fractionalSecond = `.${addLeadingZero(millis, 3)}` - } - - return `${year}-${month}-${day}T${hour}:${minute}:${second}${fractionalSecond}Z` -} - -type Stringer = { - toString(): string -} - -function addLeadingZero(num: Stringer, targetLength: number) { - let str = num.toString() - while (str.length < targetLength) { - str = `0${str}` - } - return str -} diff --git a/src/values/index.ts b/src/values/index.ts deleted file mode 100644 index 7025bf7b..00000000 --- a/src/values/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export * from './Path' -export * from './StreamValue' -export * from './types' -export * from './utils' diff --git a/src/values/types.ts b/src/values/types.ts deleted file mode 100644 index 7404566f..00000000 --- a/src/values/types.ts +++ /dev/null @@ -1,40 +0,0 @@ -import type {Path} from './Path' -import type {StreamValue} from './StreamValue' -import type {DateTime, StaticValue} from './utils' - -/** - * A type of a value in GROQ. - */ -export type GroqType = - | 'null' - | 'boolean' - | 'number' - | 'string' - | 'array' - | 'object' - | 'path' - | 'datetime' - -/** - * The result of an expression. - */ -export type Value = AnyStaticValue | StreamValue - -export type StringValue = StaticValue -export type NumberValue = StaticValue -export type NullValue = StaticValue -export type BooleanValue = StaticValue -export type DateTimeValue = StaticValue -export type PathValue = StaticValue -export type ObjectValue = StaticValue, 'object'> -export type ArrayValue = StaticValue - -export type AnyStaticValue = - | StringValue - | NumberValue - | NullValue - | BooleanValue - | DateTimeValue - | ObjectValue - | ArrayValue - | PathValue diff --git a/src/values/utils.ts b/src/values/utils.ts deleted file mode 100644 index b9c5a173..00000000 --- a/src/values/utils.ts +++ /dev/null @@ -1,137 +0,0 @@ -import {formatRFC3339, parseRFC3339} from './dateHelpers' -import {Path} from './Path' -import {StreamValue} from './StreamValue' -import type {BooleanValue, GroqType, NullValue, Value} from './types' - -export class StaticValue { - data: P - type: T - - constructor(data: P, type: T) { - this.data = data - this.type = type - } - - isArray(): boolean { - return this.type === 'array' - } - - // eslint-disable-next-line require-await - async get(): Promise { - return this.data - } - - [Symbol.asyncIterator](): Generator { - if (Array.isArray(this.data)) { - return (function* (data) { - for (const element of data) { - yield fromJS(element) - } - })(this.data) - } - throw new Error(`Cannot iterate over: ${this.type}`) - } -} - -export const NULL_VALUE: NullValue = new StaticValue(null, 'null') -export const TRUE_VALUE: BooleanValue = new StaticValue(true, 'boolean') -export const FALSE_VALUE: BooleanValue = new StaticValue(false, 'boolean') - -export class DateTime { - date: Date - - constructor(date: Date) { - this.date = date - } - - static parseToValue(str: string): Value { - const date = parseRFC3339(str) - if (date) { - return new StaticValue(new DateTime(date), 'datetime') - } - return NULL_VALUE - } - - equals(other: DateTime): boolean { - return this.date.getTime() == other.date.getTime() - } - - add(secs: number): DateTime { - const copy = new Date(this.date.getTime()) - copy.setTime(copy.getTime() + secs * 1000) - return new DateTime(copy) - } - - difference(other: DateTime): number { - return (this.date.getTime() - other.date.getTime()) / 1000 - } - - compareTo(other: DateTime): number { - return this.date.getTime() - other.date.getTime() - } - - toString(): string { - return formatRFC3339(this.date) - } - - toJSON(): string { - return this.toString() - } -} - -export function fromNumber(num: number): Value { - if (Number.isFinite(num)) { - return new StaticValue(num, 'number') - } - return NULL_VALUE -} - -export function fromString(str: string): Value { - return new StaticValue(str, 'string') -} - -export function fromDateTime(dt: DateTime): Value { - return new StaticValue(dt, 'datetime') -} - -export function fromPath(path: Path): Value { - return new StaticValue(path, 'path') -} - -function isIterator(obj?: Iterator) { - return obj && typeof obj.next === 'function' -} - -// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types -export function fromJS(val: any): Value { - if (isIterator(val)) { - return new StreamValue(async function* () { - for await (const value of val) { - yield fromJS(value) - } - }) - } else if (val === null || val === undefined) { - return NULL_VALUE - } - return new StaticValue(val, getType(val)) as any -} - -/** - * Returns the type of the value. - */ -// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types -export function getType(data: any): GroqType { - if (data === null || typeof data === 'undefined') { - return 'null' - } - if (Array.isArray(data)) { - return 'array' - } - if (data instanceof Path) { - return 'path' - } - if (data instanceof DateTime) { - return 'datetime' - } - return typeof data as GroqType -} diff --git a/test/evaluate.test.ts b/test/evaluate.test.ts index 587180ff..9af8f13f 100644 --- a/test/evaluate.test.ts +++ b/test/evaluate.test.ts @@ -14,11 +14,18 @@ t.test('Basic parsing', async (t) => { const query = `*[_type == "product"]{name}` const tree = parse(query) - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, [{name: 'T-shirt'}, {name: 'Pants'}]) }) + t.test('Queries based on static values should execute synchronously', async (t) => { + const document = {_type: 'user', name: 'Bob'} + const filter = '_type=="user"' + const query = `$__document {"result": ${filter}}.result` + const tree = parse(query) + t.same(evaluate(tree, {params: {__document: document}}), true) + }) + t.test('String function', async (t) => { const dataset = [ {_type: 'color', color: 'red', shade: 500, rgb: {r: 255, g: 0, b: 0}}, @@ -26,9 +33,7 @@ t.test('Basic parsing', async (t) => { ] const query = `*[_type == "color"]{ "class": color + "-" + string(shade + 100), "rgb": string(rgb) }` const tree = parse(query) - - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, [ {class: 'red-600', rgb: null}, {class: 'green-600', rgb: null}, @@ -43,9 +48,7 @@ t.test('Basic parsing', async (t) => { ] const query = `*[val in 1..3]._id` const tree = parse(query) - - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, ['a', 'c']) }) @@ -53,73 +56,50 @@ t.test('Basic parsing', async (t) => { const dataset = [{_id: 'a', a: true}, {_id: 'b', b: true}, {_id: 'c'}] const query = `*{"a":select(a => 1, b => 2, 3)}.a` const tree = parse(query) - - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, [1, 2, 3]) }) - t.test('Controlling this', async (t) => { - const query = `@` - const tree = parse(query) - - for (const root of [1, [1, 2], {a: 'b'}]) { - const value = await evaluate(tree, {root}) - const data = await value.get() - t.same(data, root) - } - }) + // t.test('Controlling this', async (t) => { + // const query = `@` + // const tree = parse(query) + // for (const root of [1, [1, 2], {a: 'b'}]) { + // const data = evaluate(tree, {root}) + // t.same(data, root) + // } + // }) t.test('Re-using stream', async (t) => { const query = `[[1, 2], [1, 4]] | order(@[0], @[1] desc)` const tree = parse(query) - const value = await evaluate(tree) - const data = await value.get() + const data = evaluate(tree) t.same(data, [ [1, 4], [1, 2], ]) }) - t.test('Async documents', async (t) => { - const dataset = (async function* () { - yield {_id: 'a', name: 'Michael'} - yield {_id: 'b', name: 'George Michael', father: {_ref: 'a'}} - })() - - const query = `*[father->name == "Michael"][0].name` - const tree = parse(query) - const value = await evaluate(tree, {dataset}) - const data = await value.get() - t.same(data, 'George Michael') - }) - t.test('Parameters', async (t) => { const query = `*[name == $name][].name` const dataset = [{name: 'Michael'}, {name: 'George Michael'}] const tree = parse(query) - const value = await evaluate(tree, {dataset, params: {name: 'Michael'}}) - const data = await value.get() + const data = evaluate(tree, {dataset, params: {name: 'Michael'}}) t.same(data, ['Michael']) }) t.test('Non-array documents', async (t) => { const dataset = {data: [{person: {_ref: 'b'}}]} - const query = `(*).data[]{person->}` const tree = parse(query) - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, [{person: null}]) }) t.test('Slices', async (t) => { const dataset = ['a', 'b', 'c', 'd', 'e', 'f'] - const query = `*[0...5][0..3]` const tree = parse(query) - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, ['a', 'b', 'c', 'd']) }) @@ -128,7 +108,6 @@ t.test('Basic parsing', async (t) => { {_type: 'book', title: 'A Game of Thrones'}, {_type: 'tv-show', title: 'Game of Thrones'}, ] - const query = `*[] { _type, _type == "book" => { @@ -136,16 +115,14 @@ t.test('Basic parsing', async (t) => { } }` const tree = parse(query) - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, [{_type: 'book', title: 'A Game of Thrones'}, {_type: 'tv-show'}]) }) t.test('Asc', async (t) => { t.test('returns a null value', async (t) => { const tree: ExprNode = {type: 'Asc', base: {type: 'AccessAttribute', name: 'title'}} - const value = await evaluate(tree, {}) - const data = await value.get() + const data = evaluate(tree, {}) t.same(data, null) }) }) @@ -153,8 +130,7 @@ t.test('Basic parsing', async (t) => { t.test('Desc', async (t) => { t.test('returns a null value', async (t) => { const tree: ExprNode = {type: 'Desc', base: {type: 'AccessAttribute', name: 'title'}} - const value = await evaluate(tree, {}) - const data = await value.get() + const data = evaluate(tree, {}) t.same(data, null) }) }) @@ -170,10 +146,9 @@ t.test('Basic parsing', async (t) => { t.test('throw errors when the node type is unknown', async (t) => { const tree: ExprNode = { type: 'Object', - // @ts-ignore (we want an invalid type for testing purposes) + // @ts-expect-error we want an invalid type for testing purposes attributes: [{type: 'AccessAttribute', name: 'b'}], } - throwsWithMessage(t, () => evaluate(tree, {}), 'Unknown node type: AccessAttribute') }) }) @@ -182,12 +157,11 @@ t.test('Basic parsing', async (t) => { t.test('throws when an invalid operator function is used', async (t) => { const tree: ExprNode = { type: 'OpCall', - // @ts-ignore (we want an invalid operator for testing purposes) + // @ts-expect-error (we want an invalid operator for testing purposes) op: '^', left: {type: 'AccessAttribute', name: 'a'}, right: {type: 'AccessAttribute', name: 'b'}, } - throwsWithMessage(t, () => evaluate(tree, {}), 'Unknown operator: ^') }) }) @@ -195,12 +169,9 @@ t.test('Basic parsing', async (t) => { t.test('Parent', async (t) => { t.test('returns null when no parent is present', async (t) => { const dataset = [{_type: 'book', title: 'I, Robot'}] - // We intentionally access the higher scope to force the case when the scope's `parent` value is `null` - const tree = await parse('*[]{"parentName": ^.^.name}') - const value = await evaluate(tree, {dataset}) - const data = await value.get() - + const tree = parse('*[]{"parentName": ^.^.name}') + const data = evaluate(tree, {dataset}) t.same(data, [{parentName: null}]) }) }) @@ -208,7 +179,7 @@ t.test('Basic parsing', async (t) => { t.test('Context', async (t) => { t.test('throws when an unknown key is used', async (t) => { const tree: ExprNode = {type: 'Context', key: 'foo'} - throwsWithMessage(t, () => evaluate(tree, {}), 'unknown context key: foo') + throwsWithMessage(t, () => evaluate(tree, {}), 'Unknown context key: foo') }) }) @@ -217,61 +188,55 @@ t.test('Basic parsing', async (t) => { {_id: 'drafts.agot', _type: 'book', title: 'A Game of Thrones'}, {_id: 'agot', _type: 'book', title: 'Game of Thrones'}, ] - const query = `*[_id in path("drafts.**")]{_id}` const tree = parse(query) - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, [{_id: 'drafts.agot'}]) }) t.test('Delta-GROQ', async (t) => { const tree = parse(`before().title == after().title`, {mode: 'delta'}) - const value1 = await evaluate(tree, {before: {title: 'A'}, after: {title: 'A'}}) - t.same(await value1.get(), true) - - const value2 = await evaluate(tree, {before: {title: 'A'}, after: {title: 'B'}}) - t.same(await value2.get(), false) + const value1 = evaluate(tree, {before: {title: 'A'}, after: {title: 'A'}}) + t.same(value1, true) + const value2 = evaluate(tree, {before: {title: 'A'}, after: {title: 'B'}}) + t.same(value2, false) }) t.test('delta::operation()', async (t) => { const tree = parse(`delta::operation()`, {mode: 'delta'}) - const value1 = await evaluate(tree, {before: {title: 'A'}, after: {title: 'A'}}) - t.same(await value1.get(), 'update') - - const value2 = await evaluate(tree, {before: {title: 'A'}}) - t.same(await value2.get(), 'delete') - - const value3 = await evaluate(tree, {after: {title: 'A'}}) - t.same(await value3.get(), 'create') - - const value4 = await evaluate(tree, {}) - t.same(await value4.get(), null) + const value1 = evaluate(tree, {before: {title: 'A'}, after: {title: 'A'}}) + t.same(value1, 'update') + const value2 = evaluate(tree, {before: {title: 'A'}}) + t.same(value2, 'delete') + const value3 = evaluate(tree, {after: {title: 'A'}}) + t.same(value3, 'create') + const value4 = evaluate(tree, {}) + t.same(value4, null) }) t.test('Override identity()', async (t) => { const dataset = [{_id: 'yes', user: 'me'}] const query = `{"me":identity(), "nested": *[user == "me"][0]._id}` const tree = parse(query) - const value = await evaluate(tree, {dataset, identity: 'bob'}) - const data = await value.get() + const data = evaluate(tree, {dataset, identity: 'bob'}) t.same(data, {me: 'bob', nested: 'yes'}) }) t.test('Override now()', async (t) => { - const dataset = [{_id: 'yes', time: '2021-05-06T12:14:15Z'}] + const dataset = [{_id: 'yes', time: new Date('2021-05-06T12:14:15Z').toISOString()}] const query = `{"me":now(), "nested": *[dateTime(time) == dateTime(now())][0]._id}` const tree = parse(query) - const value = await evaluate(tree, {dataset, timestamp: new Date('2021-05-06T12:14:15Z')}) - const data = await value.get() + const data = evaluate(tree, { + dataset, + timestamp: new Date('2021-05-06T12:14:15Z').toISOString(), + }) t.same(data, {me: '2021-05-06T12:14:15.000Z', nested: 'yes'}) }) t.test('sanity-functions default', async (t) => { const query = `sanity::dataset() + sanity::projectId()` const tree = parse(query) - const value = await evaluate(tree) - const data = await value.get() + const data = evaluate(tree) t.same(data, null) }) @@ -279,8 +244,7 @@ t.test('Basic parsing', async (t) => { t.test('sanity::dataset() and sanity::projectId()', async (t) => { const query = `sanity::dataset() + sanity::projectId()` const tree = parse(query) - const value = await evaluate(tree, {sanity: {dataset: 'abc', projectId: 'def'}}) - const data = await value.get() + const data = evaluate(tree, {sanity: {dataset: 'abc', projectId: 'def'}}) t.same(data, 'abcdef') }) @@ -292,10 +256,8 @@ t.test('Basic parsing', async (t) => { {_id: 'weekend.sale.doc1', _version: {}}, {_id: 'doc2', _version: {}}, ] - const tree = parse('{"versions": sanity::versionsOf("doc1")}') - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, {versions: ['drafts.doc1', 'sale.doc1']}) }) @@ -309,31 +271,9 @@ t.test('Basic parsing', async (t) => { {_id: 'weekend.sale.doc1', _version: {}}, {_id: 'doc2', _version: {}}, ] - const tree = parse('{"documentsInBundle": sanity::partOfRelease("sale")}') - const value = await evaluate(tree, {dataset}) - const data = await value.get() + const data = evaluate(tree, {dataset}) t.same(data, {documentsInBundle: ['sale.doc1', 'sale.doc2']}) }) }) - - t.test('Custom dereference function', async (t) => { - const dataset = [ - {_id: 'a', name: 'Michael'}, - {_id: 'b', name: 'George Michael', father: {_ref: 'a'}}, - ] - const datasetAsMap = new Map(dataset.map((data) => [data._id, data])) - - const query = `*[]{ name, "father": father->name }` - const tree = parse(query) - const value = await evaluate(tree, { - dataset, - dereference: ({_ref}) => Promise.resolve(datasetAsMap.get(_ref)), - }) - const data = await value.get() - t.same(data, [ - {name: 'Michael', father: null}, - {name: 'George Michael', father: 'Michael'}, - ]) - }) }) From 6003aa119aac8a2a76767a663a602365ca333328 Mon Sep 17 00:00:00 2001 From: Rico Kahler Date: Thu, 20 Feb 2025 15:01:49 -0600 Subject: [PATCH 2/2] feat: lazy evaluation --- src/evaluator/evaluate.ts | 74 ++++++++++++------ src/evaluator/functions.ts | 155 +++++++++++++++++++------------------ src/evaluator/matching.ts | 14 +--- src/evaluator/operators.ts | 24 ++++-- src/evaluator/scoring.ts | 21 ++--- 5 files changed, 163 insertions(+), 125 deletions(-) diff --git a/src/evaluator/evaluate.ts b/src/evaluator/evaluate.ts index 76ac2067..394c057c 100644 --- a/src/evaluator/evaluate.ts +++ b/src/evaluator/evaluate.ts @@ -9,6 +9,17 @@ interface EvaluateOptions extends Context { node: ExprNode } +export function isIterable(value: unknown): value is Iterable { + if (value === null || value === undefined) { + return false + } + if (typeof value !== 'object' && typeof value !== 'function') { + return false + } + const iteratorMethod = (value as {[Symbol.iterator]?: unknown})[Symbol.iterator] + return typeof iteratorMethod === 'function' +} + const iso8601Regex = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})$/ export const isIso8601 = (str: unknown): str is string => typeof str === 'string' && iso8601Regex.test(str) @@ -75,8 +86,8 @@ export function evaluate({node, ...context}: EvaluateOptions): unknown { case 'Filter': { const base = evaluate({...context, node: node.base}) - if (!Array.isArray(base)) return null - return base.filter((item) => + if (!isIterable(base)) return null + return Iterator.from(base).filter((item) => evaluate({ ...context, node: node.expr, @@ -117,24 +128,36 @@ export function evaluate({node, ...context}: EvaluateOptions): unknown { case 'AccessElement': { const base = evaluate({...context, node: node.base}) - if (!Array.isArray(base)) return null - return base.at(node.index) + if (!isIterable(base)) return null + const index = node.index + if (index < 0) return Array.from(base).at(index) + return Iterator.from(base).drop(index).next().value } case 'Slice': { const base = evaluate({...context, node: node.base}) - if (!Array.isArray(base)) return null - return base.slice(node.left, node.isInclusive ? node.right + 1 : node.right) + if (!isIterable(base)) return null + const start = node.left + const end = node.isInclusive ? node.right + 1 : node.right + + // negative slices require buffering the entire iterable into an array + if (start < 0 || end < 0) { + return Array.from(base).slice(start, end) + } + + return Iterator.from(base) + .drop(start) + .take(end - start) } case 'Deref': { const base = evaluate({...context, node: node.base}) const root = context.scope.at(0) - if (!Array.isArray(root)) return null + if (!isIterable(root)) return null if (typeof base !== 'object' || !base) return null if (!('_ref' in base) || typeof base._ref !== 'string') return null - return root.find( + return Iterator.from(root).find( (doc: unknown) => typeof doc === 'object' && !!doc && @@ -156,10 +179,10 @@ export function evaluate({node, ...context}: EvaluateOptions): unknown { return node.attributes.reduce>((acc, attribute) => { switch (attribute.type) { case 'ObjectAttributeValue': { - const value = evaluate({...context, node: attribute.value}) - if (value !== undefined) { - acc[attribute.name] = value - } + Object.defineProperty(acc, attribute.name, { + enumerable: true, + get: () => evaluate({...context, node: attribute.value}), + }) return acc } @@ -194,10 +217,13 @@ export function evaluate({node, ...context}: EvaluateOptions): unknown { } case 'Array': { - return node.elements.flatMap((element) => { + return Iterator.from(node.elements).flatMap(function* (element) { const value = evaluate({...context, node: element.value}) - if (element.isSplat) return Array.isArray(value) ? value : [] - return value + if (element.isSplat && isIterable(value)) { + yield* Iterator.from(value) + } else { + yield value + } }) } @@ -251,14 +277,15 @@ export function evaluate({node, ...context}: EvaluateOptions): unknown { case 'ArrayCoerce': { const base = evaluate({...context, node: node.base}) - if (Array.isArray(base)) return base + if (isIterable(base)) return base return null } case 'Map': { const base = evaluate({...context, node: node.base}) - if (!Array.isArray(base)) return null - return base.map((item) => + if (!isIterable(base)) return null + + return Iterator.from(base).map((item) => evaluate({ ...context, node: node.expr, @@ -269,15 +296,18 @@ export function evaluate({node, ...context}: EvaluateOptions): unknown { case 'FlatMap': { const base = evaluate({...context, node: node.base}) - if (!Array.isArray(base)) return null - return base.flatMap((item) => { + if (!isIterable(base)) return null + return Iterator.from(base).flatMap(function* (item) { const child = evaluate({ ...context, node: node.expr, scope: [...context.scope.slice(0, -1), item], }) - if (Array.isArray(child)) return child - return [child] + if (isIterable(child)) { + yield* Iterator.from(child) + } else { + yield child + } }) } diff --git a/src/evaluator/functions.ts b/src/evaluator/functions.ts index 88eaeb84..2da5dbe3 100644 --- a/src/evaluator/functions.ts +++ b/src/evaluator/functions.ts @@ -2,13 +2,13 @@ import {toPlainText} from '@portabletext/toolkit' import type {ArbitraryTypedObject, PortableTextBlock} from '@portabletext/types' import type {ExprNode} from '../nodeTypes' -import {evaluate, isIso8601} from './evaluate' +import {evaluate, isIso8601, isIterable} from './evaluate' import {compare, getTypeRank} from './ordering' import {evaluateScore} from './scoring' import type {Context} from './types' function hasReference(value: unknown, paths: Set): boolean { - if (Array.isArray(value)) { + if (isIterable(value)) { for (const child of value) { if (hasReference(child, paths)) return true } @@ -93,8 +93,8 @@ coalesce.arity = 1 function count({args: [arg], ...context}: GroqFunctionOptions): number | null { const base = evaluate({...context, node: arg}) - if (!Array.isArray(base)) return null - return base.length + if (!isIterable(base)) return null + return Iterator.from(base).reduce((count) => count + 1, 0) } count.arity = 1 @@ -120,7 +120,7 @@ identity.arity = 0 function length({args: [baseArg], ...context}: GroqFunctionOptions): number | null { const base = evaluate({...context, node: baseArg}) if (typeof base === 'string') return countUTF8(base) - if (Array.isArray(base)) return base.length + if (isIterable(base)) return Iterator.from(base).reduce((length) => length + 1, 0) return null } length.arity = 1 @@ -151,7 +151,7 @@ function references({args, ...context}: GroqFunctionOptions): boolean { const paths = new Set( args.flatMap((arg) => { const base = evaluate({...context, node: arg}) - return (Array.isArray(base) ? base : [base]).filter((i) => typeof i === 'string') + return (isIterable(base) ? Array.from(base) : [base]).filter((i) => typeof i === 'string') }), ) return hasReference(context.scope.at(-1), paths) @@ -222,7 +222,7 @@ startsWith.arity = 2 function join({args: [baseArg, separatorArg], ...context}: GroqFunctionOptions): string | null { const base = evaluate({...context, node: baseArg}) - if (!Array.isArray(base)) return null + if (!isIterable(base)) return null const separator = evaluate({...context, node: separatorArg}) if (typeof separator !== 'string') return null @@ -246,21 +246,24 @@ function join({args: [baseArg, separatorArg], ...context}: GroqFunctionOptions): } join.arity = 2 -function compact({args: [baseArg], ...context}: GroqFunctionOptions): unknown[] | null { +function compact({ + args: [baseArg], + ...context +}: GroqFunctionOptions): IteratorObject | null { const base = evaluate({...context, node: baseArg}) - if (!Array.isArray(base)) return null - return base.filter((i = null) => i !== null) + if (!isIterable(base)) return null + return Iterator.from(base).filter((i = null) => i !== null) } compact.arity = 1 function unique({args: [baseArg], ...context}: GroqFunctionOptions): unknown[] | null { const base = evaluate({...context, node: baseArg}) - if (!Array.isArray(base)) return null + if (!isIterable(base)) return null // `Set`s preserve the order in which those unique values were first inserted return Array.from( new Set( - base.map((item) => { + Iterator.from(base).map((item) => { switch (typeof item) { case 'boolean': case 'number': @@ -279,13 +282,13 @@ unique.arity = 1 function intersects({args: [leftArg, rightArg], ...context}: GroqFunctionOptions): boolean | null { const left = evaluate({...context, node: leftArg}) - if (!Array.isArray(left)) return null + if (!isIterable(left)) return null const right = evaluate({...context, node: rightArg}) - if (!Array.isArray(right)) return null + if (!isIterable(right)) return null - const createSet = (left: unknown[]) => + const createSet = (iterable: Iterable) => new Set( - left + Iterator.from(iterable) .filter( (i) => i === undefined || @@ -294,26 +297,21 @@ function intersects({args: [leftArg, rightArg], ...context}: GroqFunctionOptions typeof i === 'number' || typeof i === 'string', ) - .map((i = null) => `${i}`), + .map((i) => `${i}`), ) const leftSet = createSet(left) const rightSet = createSet(right) - if (typeof Set.prototype.isDisjointFrom === 'function') { - return !leftSet.isDisjointFrom(rightSet) - } - - for (const item of leftSet) { - if (rightSet.has(item)) return true - } - return false + // TODO: ensure polyfills for this are here + return !leftSet.isDisjointFrom(rightSet) } intersects.arity = 2 function text({args: [baseArg], ...context}: GroqFunctionOptions): string | null { const base = evaluate({...context, node: baseArg}) try { + // TODO: this may not work anymore ... return toPlainText(base as PortableTextBlock | ArbitraryTypedObject[] | PortableTextBlock[]) } catch { return null @@ -331,16 +329,18 @@ function dataset({sanity}: GroqFunctionOptions): string | null { } dataset.arity = 0 -function versionsOf({args: [baseArg], ...context}: GroqFunctionOptions): string[] | null { +function versionsOf({ + args: [baseArg], + ...context +}: GroqFunctionOptions): IteratorObject | null { const root = context.scope.at(0) - if (!Array.isArray(root)) return null + if (!isIterable(root)) return null const baseId = evaluate({...context, node: baseArg}) if (typeof baseId !== 'string') return null - return root + return Iterator.from(root) .filter((value: unknown): value is {_id: string; _version: unknown} => { - // All the document are a version of the given ID if: if (!value) return false if (typeof value !== 'object') return false if (!('_id' in value) || typeof value._id !== 'string') return false @@ -351,19 +351,22 @@ function versionsOf({args: [baseArg], ...context}: GroqFunctionOptions): string[ // 2. And, they have a field called _version which is an object. return '_version' in value && typeof value._version === 'object' - }, []) + }) .map((i) => i._id) } versionsOf.arity = 1 -function partOfRelease({args: [baseArg], ...context}: GroqFunctionOptions): string[] | null { +function partOfRelease({ + args: [baseArg], + ...context +}: GroqFunctionOptions): IteratorObject | null { const root = context.scope.at(0) - if (!Array.isArray(root)) return null + if (!isIterable(root)) return null const baseId = evaluate({...context, node: baseArg}) if (typeof baseId !== 'string') return null - return root + return Iterator.from(root) .filter((value: unknown): value is {_id: string; _version: unknown} => { // A document belongs to a bundle ID if: if (!value) return false @@ -383,9 +386,9 @@ function partOfRelease({args: [baseArg], ...context}: GroqFunctionOptions): stri partOfRelease.arity = 1 function order({base, args, ...context}: GroqPipeFunctionOptions): unknown[] | null { - if (!Array.isArray(base)) return null + if (!isIterable(base)) return null - return base + return Array.from(base) .map((value, index) => ({value, index})) .sort((a, b) => { for (const ordering of args) { @@ -416,25 +419,26 @@ function order({base, args, ...context}: GroqPipeFunctionOptions): unknown[] | n order.arity = (count: number) => count >= 1 function score({base, args, ...context}: GroqPipeFunctionOptions): ObjectWithScore[] | null { - if (!Array.isArray(base)) return null + if (!isIterable(base)) return null - return base - .filter((item: unknown): item is object => typeof item !== 'object' && !!item) - .map((item) => { - const prevScore = '_score' in item && typeof item._score === 'number' ? item._score : 0 - const score = args.reduce((acc, arg) => { - return ( - acc + - evaluateScore({ - ...context, - node: arg, - scope: [...context.scope, item], - }) - ) - }, prevScore) - return Object.assign({}, item, {_score: score}) - }) - .sort((a, b) => a._score - b._score) + return Array.from( + Iterator.from(base) + .filter((item: unknown): item is object => typeof item !== 'object' && !!item) + .map((item) => { + const prevScore = '_score' in item && typeof item._score === 'number' ? item._score : 0 + const score = args.reduce((acc, arg) => { + return ( + acc + + evaluateScore({ + ...context, + node: arg, + scope: [...context.scope, item], + }) + ) + }, prevScore) + return Object.assign({}, item, {_score: score}) + }), + ).sort((a, b) => a._score - b._score) } score.arity = (count: number) => count >= 1 @@ -447,75 +451,76 @@ function operation({after, before}: GroqFunctionOptions): 'update' | 'create' | function min({args: [baseArg], ...context}: GroqFunctionOptions): number | null { const base = evaluate({...context, node: baseArg}) - if (!Array.isArray(base)) return null - - const nonNullBase = base.filter((item = null) => item !== null) - if (!nonNullBase.length) return null + if (!isIterable(base)) return null let min = Infinity - for (const item of nonNullBase) { + for (const item of base) { + if (item === null || item === undefined) continue // early exit if a non-null, non-number is found if (typeof item !== 'number') return null if (item < min) { min = item } } + + if (min === Infinity) return null return min } min.arity = 1 function max({args: [baseArg], ...context}: GroqFunctionOptions): number | null { const base = evaluate({...context, node: baseArg}) - if (!Array.isArray(base)) return null - - const items = base.filter((item = null) => item !== null) - if (!items.length) return null + if (!isIterable(base)) return null let max = -Infinity - for (const item of items) { + for (const item of base) { + if (item === undefined || item === null) continue // early exit if a non-null, non-number is found if (typeof item !== 'number') return null if (item > max) { max = item } } + if (max === -Infinity) return null return max } max.arity = 1 function sum({args: [baseArg], ...context}: GroqFunctionOptions): number | null { const base = evaluate({...context, node: baseArg}) - if (!Array.isArray(base)) return null - - const items = base.filter((item = null) => item !== null) - if (!items.length) return null + if (!isIterable(base)) return null let sum = 0 - for (const item of items) { + let foundNumber = false + for (const item of base) { + if (item === undefined || item === null) continue // early exit if a non-null, non-number is found if (typeof item !== 'number') return null + foundNumber = true sum += item } + + if (!foundNumber) return null return sum } sum.arity = 1 function avg({args: [baseArg], ...context}: GroqFunctionOptions): number | null { const base = evaluate({...context, node: baseArg}) - if (!Array.isArray(base)) return null - - const items = base.filter((item = null) => item !== null) - if (!items.length) return null + if (!isIterable(base)) return null let sum = 0 - for (const item of items) { + let count = 0 + for (const item of base) { + if (item === undefined || item === null) continue // early exit if a non-null, non-number is found if (typeof item !== 'number') return null + count += 1 sum += item } - if (!items.length) return null - return sum / items.length + if (count === 0) return null + return sum / count } avg.arity = 1 diff --git a/src/evaluator/matching.ts b/src/evaluator/matching.ts index 5c658a81..9846e998 100644 --- a/src/evaluator/matching.ts +++ b/src/evaluator/matching.ts @@ -3,15 +3,7 @@ const CHARS_WITH_WILDCARD = /([^!@#$%^&(),\\/?";:{}|[\]+<>\s-])+/g const EDGE_CHARS = /(\b\.+|\.+\b)/g const MAX_TERM_LENGTH = 1024 -export type Pattern = (tokens: string[]) => boolean - -export function matchText(tokens: string[], patterns: Pattern[]): boolean { - if (tokens.length === 0 || patterns.length === 0) { - return false - } - - return patterns.every((pattern) => pattern(tokens)) -} +export type Pattern = (tokens: string[] | IteratorObject) => boolean export function matchTokenize(text: string): string[] { return text.replace(EDGE_CHARS, '').match(CHARS) || [] @@ -19,7 +11,9 @@ export function matchTokenize(text: string): string[] { export function matchAnalyzePattern(text: string): Pattern[] { const termsRe = matchPatternRegex(text) - return termsRe.map((re) => (tokens: string[]) => tokens.some((token) => re.test(token))) + return termsRe.map( + (re) => (tokens: string[] | IteratorObject) => tokens.some((token) => re.test(token)), + ) } export function matchPatternRegex(text: string): RegExp[] { diff --git a/src/evaluator/operators.ts b/src/evaluator/operators.ts index ec67676f..6962645e 100644 --- a/src/evaluator/operators.ts +++ b/src/evaluator/operators.ts @@ -3,7 +3,7 @@ import type {OpCallNode} from '../nodeTypes' import {isEqual} from './equality' -import {evaluate, isIso8601} from './evaluate' +import {evaluate, isIso8601, isIterable} from './evaluate' import {matchAnalyzePattern, matchText, matchTokenize} from './matching' import {compare} from './ordering' import type {Context} from './types' @@ -12,6 +12,11 @@ interface EvaluateOpCallOptions extends Context { node: OpCallNode } +function* concat(a: Iterable, b: Iterable): Generator { + for (const item of a) yield item + for (const item of b) yield item +} + export function evaluateOpCall({node, ...context}: EvaluateOpCallOptions): unknown { switch (node.op) { case '==': { @@ -98,23 +103,26 @@ export function evaluateOpCall({node, ...context}: EvaluateOpCallOptions): unkno const right = evaluate({...context, node: node.right}) - if (!Array.isArray(right)) return null - return right.some((item) => isEqual(left, item)) + if (!isIterable(right)) return null + return Iterator.from(right).some((item) => isEqual(left, item)) } case 'match': { const left = evaluate({...context, node: node.left}) const right = evaluate({...context, node: node.right}) - const tokens = (Array.isArray(left) ? left : [left]) + const tokens = (isIterable(left) ? Iterator.from(left) : [left].values()) .filter((i) => typeof i === 'string') .flatMap(matchTokenize) - const patterns = (Array.isArray(right) ? right : [right]) + const patterns = (isIterable(right) ? Iterator.from(right) : [right].values()) .filter((i) => typeof i === 'string') .flatMap(matchAnalyzePattern) - if (!patterns.length) return false - return matchText(tokens, patterns) + // if there are no patterns or tokens return false + if (!patterns.some(() => true)) return false + if (!tokens.some(() => true)) return false + + return patterns.every((pattern) => pattern(tokens)) } case '+': { @@ -126,7 +134,7 @@ export function evaluateOpCall({node, ...context}: EvaluateOpCallOptions): unkno } if (typeof left === 'number' && typeof right === 'number') return left + right if (typeof left === 'string' && typeof right === 'string') return `${left}${right}` - if (Array.isArray(left) && Array.isArray(right)) return [...left, ...right] + if (isIterable(left) && isIterable(right)) return concat(left, right) if (typeof left === 'object' && left && typeof right === 'object' && right) { return {...left, ...right} } diff --git a/src/evaluator/scoring.ts b/src/evaluator/scoring.ts index 8f012f5e..95bd6295 100644 --- a/src/evaluator/scoring.ts +++ b/src/evaluator/scoring.ts @@ -1,5 +1,5 @@ import type {ExprNode} from '../nodeTypes' -import {evaluate} from './evaluate' +import {evaluate, isIterable} from './evaluate' import {matchPatternRegex, matchTokenize} from './matching' import type {Context} from './types' @@ -14,15 +14,16 @@ export function evaluateScore({node, ...context}: EvaluateScoreOptions): number if (node.type === 'OpCall' && node.op === 'match') { const left = evaluate({...context, node: node.left}) const right = evaluate({...context, node: node.right}) - const leftStrings = (Array.isArray(left) ? left : [left]).filter((i) => typeof i === 'string') - const rightStrings = (Array.isArray(right) ? right : [right]).filter( - (i) => typeof i === 'string', - ) - if (rightStrings.length) return 0 - - const tokens = leftStrings.flatMap(matchTokenize) - const terms = rightStrings.flatMap(matchPatternRegex) - if (!tokens.length || !terms.length) return 0 + const tokens = (isIterable(left) ? Iterator.from(left) : [left].values()) + .filter((i) => typeof i === 'string') + .flatMap(matchTokenize) + const terms = (isIterable(right) ? Iterator.from(right) : [right].values()) + .filter((i) => typeof i === 'string') + .flatMap(matchPatternRegex) + + // if either iterable is empty + if (!tokens.some(() => true)) return 0 + if (!terms.some(() => true)) return 0 return terms.reduce((score, re) => { const freq = tokens.reduce((c, token) => c + (re.test(token) ? 1 : 0), 0)