diff --git a/package-lock.json b/package-lock.json index af3ef8d83..377f2ac70 100644 --- a/package-lock.json +++ b/package-lock.json @@ -21995,6 +21995,12 @@ "vega-lite": "*" } }, + "node_modules/vega-embed/node_modules/yallist": { + "version": "4.0.0", + "extraneous": true, + "inBundle": true, + "license": "ISC" + }, "node_modules/vega-encode": { "version": "4.9.1", "resolved": "https://registry.npmjs.org/vega-encode/-/vega-encode-4.9.1.tgz", @@ -42861,6 +42867,13 @@ "vega-schema-url-parser": "^2.2.0", "vega-themes": "^2.10.0", "vega-tooltip": "^0.27.0" + }, + "dependencies": { + "yallist": { + "version": "4.0.0", + "bundled": true, + "extraneous": true + } } }, "vega-encode": { diff --git a/packages/backend/indices/iex-insights.json b/packages/backend/indices/iex-insights.json index 861ac6625..9b731c59f 100644 --- a/packages/backend/indices/iex-insights.json +++ b/packages/backend/indices/iex-insights.json @@ -130,6 +130,10 @@ "type": "keyword", "ignore_above": 512, "normalizer": "lowercase_normalizer" + }, + "simple": { + "type": "text", + "analyzer": "simple" } } }, @@ -278,10 +282,7 @@ "normalizer": { "lowercase_normalizer": { "type": "custom", - "filter": [ - "lowercase", - "trim" - ] + "filter": ["lowercase", "trim"] } } } diff --git a/packages/backend/schema.gql b/packages/backend/schema.gql index 2dade8bb4..0e54cb23c 100644 --- a/packages/backend/schema.gql +++ b/packages/backend/schema.gql @@ -332,6 +332,7 @@ input InsightSearch { paging: Paging query: String! sort: [Sort!] + useNewSearch: Boolean! } type InsightSearchResults { diff --git a/packages/backend/src/lib/elasticsearch.ts b/packages/backend/src/lib/elasticsearch.ts index d3e2c080b..f7a94f996 100644 --- a/packages/backend/src/lib/elasticsearch.ts +++ b/packages/backend/src/lib/elasticsearch.ts @@ -21,7 +21,18 @@ import { GetResponse, MgetResponse, SearchBody, SearchResponse } from '@iex/mode import { IndexedInsight } from '@iex/models/indexed/indexed-insight'; import { ItemType } from '@iex/models/item-type'; import { getLogger } from '@iex/shared/logger'; -import { parseToElasticsearch, SearchMultiTerm, SearchNestedOrFilter, SearchTerm } from '@iex/shared/search'; +import { + parseToElasticsearch as parseToElasticsearchOld, + SearchMultiTerm as SearchMultiTermOld, + SearchNestedOrFilter as SearchNestedOrFilterOld, + SearchTerm as SearchTermOld +} from '@iex/shared/search'; +import { + parseToElasticsearch as parseToElasticsearchNew, + SearchMultiTerm as SearchMultiTermNew, + SearchNestedOrFilter as SearchNestedOrFilterNew, + SearchTerm as SearchTermNew +} from '@iex/shared/search2'; import { detailedDiff } from 'deep-object-diff'; import { DateTime } from 'luxon'; @@ -321,6 +332,10 @@ export async function searchInsights( if (search != null) { // Parse an IEX search into Elasticsearch query + const parseToElasticsearch = search.useNewSearch ? parseToElasticsearchNew : parseToElasticsearchOld; + const SearchMultiTerm = search.useNewSearch ? SearchMultiTermNew : SearchMultiTermOld; + const SearchNestedOrFilter = search.useNewSearch ? SearchNestedOrFilterNew : SearchNestedOrFilterOld; + const SearchTerm = search.useNewSearch ? SearchTermNew : SearchTermOld; query.body!.query = parseToElasticsearch(search.query, (clauses) => { // This modifier function runs after parsing but before converting to Elasticsearch diff --git a/packages/backend/src/models/insight-search.ts b/packages/backend/src/models/insight-search.ts index b2533d75f..12872725c 100644 --- a/packages/backend/src/models/insight-search.ts +++ b/packages/backend/src/models/insight-search.ts @@ -38,6 +38,9 @@ export class InsightSearch { @Field() query!: string; + @Field() + useNewSearch!: boolean; + @Field(() => [Sort], { nullable: true }) sort?: Sort[]; diff --git a/packages/frontend/src/pages/search-page/components/search-bar/search-bar.tsx b/packages/frontend/src/pages/search-page/components/search-bar/search-bar.tsx index c130d400c..5c0990991 100644 --- a/packages/frontend/src/pages/search-page/components/search-bar/search-bar.tsx +++ b/packages/frontend/src/pages/search-page/components/search-bar/search-bar.tsx @@ -24,6 +24,7 @@ import { MenuItemOption, MenuList, MenuOptionGroup, + Switch, Tooltip } from '@chakra-ui/react'; import type { ReactElement } from 'react'; @@ -52,12 +53,18 @@ const availableSortFields = [ export const SearchBar = (): ReactElement => { const dispatch = useDispatch(); - - const { query, sort, showFilters, isFiltered, options } = useSelector((state: RootState) => state.search); + const { query, useNewSearch, sort, showFilters, isFiltered, options } = useSelector( + (state: RootState) => state.search + ); const [internalQuery, setInternalQuery] = useState(query); + const previousQueryRef = useRef(query); + const setUseNewSearch = (value: boolean) => { + dispatch(searchSlice.actions.setUseNewSearch(value)); + }; + const toggleShowFilters = () => { dispatch(searchSlice.actions.setShowFilters(!showFilters)); }; @@ -124,6 +131,16 @@ export const SearchBar = (): ReactElement => { canClear={query.length > 0 || sort !== undefined} /> + + setUseNewSearch(event?.target.checked)} + /> + + diff --git a/packages/frontend/src/pages/search-page/search-page.tsx b/packages/frontend/src/pages/search-page/search-page.tsx index 605ac360e..5b7beaf81 100644 --- a/packages/frontend/src/pages/search-page/search-page.tsx +++ b/packages/frontend/src/pages/search-page/search-page.tsx @@ -35,7 +35,7 @@ import { SearchBar } from './components/search-bar/search-bar'; export const SearchPage = () => { const initialized = useRef(false); const dispatch = useDispatch(); - const { query, sort, showFilters, options } = useSelector((state: RootState) => state.search); + const { query, useNewSearch, sort, showFilters, options } = useSelector((state: RootState) => state.search); // Load query from params const { query: queryFromUrl } = useParams(); @@ -45,7 +45,8 @@ export const SearchPage = () => { const [{ data, error, fetching, hasMore, total }, fetchMore] = useSearch({ query, sort, - paused: !initialized.current + paused: !initialized.current, + useNewSearch }); const insightResults = data.insights.results.map(({ insight }) => { @@ -74,9 +75,9 @@ export const SearchPage = () => { } } - const url = '/search' + generateSearchUrl(query, sort); + const url = '/search' + generateSearchUrl(query, sort, useNewSearch); navigate(url, { replace: true }); - }, [dispatch, navigate, query, queryFromUrl, searchParams, sort]); + }, [dispatch, navigate, query, useNewSearch, queryFromUrl, searchParams, sort]); useDebounce( () => { diff --git a/packages/frontend/src/shared/search-url.ts b/packages/frontend/src/shared/search-url.ts index e50f6194c..00ac6f50d 100644 --- a/packages/frontend/src/shared/search-url.ts +++ b/packages/frontend/src/shared/search-url.ts @@ -16,7 +16,11 @@ import type { Sort } from '../models/generated/graphql'; -export const generateSearchUrl = (query: string | undefined, sort: Sort | undefined) => { +export const generateSearchUrl = ( + query: string | undefined, + sort: Sort | undefined, + useNewSearch?: boolean | undefined +) => { const path = `/${encodeURIComponent(query || '')}`; const searchParams: string[] = []; if (sort != null) { @@ -27,6 +31,9 @@ export const generateSearchUrl = (query: string | undefined, sort: Sort | undefi searchParams.push(`dir=${sort.direction}`); } } + if (useNewSearch === false) { + searchParams.push(`legacySearch=true`); + } return searchParams.length > 0 ? `${path}?${searchParams.join('&')}` : path; }; diff --git a/packages/frontend/src/shared/useSearch.ts b/packages/frontend/src/shared/useSearch.ts index b7f7aaabe..240bf648a 100644 --- a/packages/frontend/src/shared/useSearch.ts +++ b/packages/frontend/src/shared/useSearch.ts @@ -81,6 +81,7 @@ const INSIGHTS_QUERY = gql` export interface UseSearchProps { query?: string; + useNewSearch?: boolean; sort?: Sort; paused?: boolean; } @@ -122,7 +123,7 @@ export type UseSearchReturnType = [SearchResultState, () => Promise]; * @param {boolean} [options.paused=false] - Whether the search is paused. * @returns {UseSearchReturnType} - The search result state and a function to fetch more results. */ -export function useSearch({ query, sort, paused = false }: UseSearchProps): UseSearchReturnType { +export function useSearch({ query, useNewSearch = true, sort, paused = false }: UseSearchProps): UseSearchReturnType { const [fetching, setFetching] = useState(true); const [suggestedFilters, setSuggestedFilters] = useState(); @@ -142,7 +143,7 @@ export function useSearch({ query, sort, paused = false }: UseSearchProps): UseS } // Generate a unique ID for this request so we can ignore old responses - const requestId = `r||${query}||${sort}||${from.current}`; + const requestId = `r||${query}||${sort}||${from.current}||${useNewSearch}`; if (latestRequest.current === requestId) { // Ignore duplicate requests // The infinite scroll component may trigger multiple requests for the same page @@ -155,6 +156,7 @@ export function useSearch({ query, sort, paused = false }: UseSearchProps): UseS .query(INSIGHTS_QUERY, { search: { query: query || '', + useNewSearch, sort: sort && [sort], paging: { from: from.current, @@ -188,7 +190,7 @@ export function useSearch({ query, sort, paused = false }: UseSearchProps): UseS // Done! setFetching(false); - }, [paused, query, sort]); + }, [paused, query, useNewSearch, sort]); useEffect(() => { // Reset the scroll state whenever query/sort changes diff --git a/packages/frontend/src/store/search.slice.ts b/packages/frontend/src/store/search.slice.ts index 91bd9f04d..b757036f6 100644 --- a/packages/frontend/src/store/search.slice.ts +++ b/packages/frontend/src/store/search.slice.ts @@ -28,6 +28,7 @@ export interface SearchOptions { export interface SearchState { query: string; + useNewSearch: boolean; sort?: Sort; paging?: Paging; @@ -43,6 +44,7 @@ export interface SearchState { const initialState: SearchState = { query: '', + useNewSearch: true, sort: undefined, paging: undefined, isFiltered: false, @@ -62,6 +64,9 @@ export const searchSlice = createSlice({ setQuery(state, action: PayloadAction) { state.query = action.payload; }, + setUseNewSearch(state, action: PayloadAction) { + state.useNewSearch = action.payload; + }, setSort(state, action: PayloadAction) { state.sort = action.payload; }, @@ -105,7 +110,10 @@ export const searchSlice = createSlice({ }, parseUrlIntoState( state, - action: PayloadAction<{ query: string | undefined; searchParams: { sort?: string; dir?: string } }> + action: PayloadAction<{ + query: string | undefined; + searchParams: { sort?: string; dir?: string; legacySearch?: boolean }; + }> ) { const { query, searchParams } = action.payload; // Take URL only if not empty @@ -127,6 +135,11 @@ export const searchSlice = createSlice({ modified = true; } + if (searchParams.legacySearch) { + state.useNewSearch = false; + modified = true; + } + // If query provided but sort not modified, clear the existing sort options if (query && !modified) { state.sort = undefined; diff --git a/packages/shared/src/search.ts b/packages/shared/src/search.ts index bcc2fe1e6..be2d4de8d 100644 --- a/packages/shared/src/search.ts +++ b/packages/shared/src/search.ts @@ -457,7 +457,6 @@ const lang = Parsimmon.createLanguage({ */ export function parseSearchQuery(searchQuery: string): SearchClause[] { const clauses: SearchClause[] = lang.Query.tryParse(searchQuery); - return clauses; } diff --git a/packages/shared/src/search2.ts b/packages/shared/src/search2.ts new file mode 100644 index 000000000..e742073c9 --- /dev/null +++ b/packages/shared/src/search2.ts @@ -0,0 +1,564 @@ +/** + * Copyright 2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import type { SearchQuery } from '@iex/models/elasticsearch'; +import { isObject } from 'lodash'; +import isArray from 'lodash/isArray'; +import mergeWith from 'lodash/mergeWith'; +import Parsimmon, { optWhitespace } from 'parsimmon'; + +/** + * Translates from user-facing search keys to Elasticsearch fields. + * + * @param key User-provided key. + */ +function convertField(key: string): string { + switch (key) { + case 'tag': { + return 'tags.keyword'; + } + case 'author': { + return 'contributors.userName.keyword'; + } + case 'user': { + return 'user.userName.keyword'; + } + case 'targetUser': { + return 'details.userName.keyword'; + } + case 'team': { + return 'metadata.team.keyword'; + } + case 'createdDate': { + return 'createdAt'; + } + case 'updatedDate': { + return 'updatedAt'; + } + case 'publishedDate': { + return 'metadata.publishedDate'; + } + case 'itemType': { + return 'itemType'; + } + case 'insight': { + return 'details.insightName.keyword'; + } + default: { + return key; + } + } +} + +/** + * Translates from user-facing operations to Elasticsearch operations. + * + * @param operation User-provided operation. + */ +function convertOperation(operation: string): string { + switch (operation) { + case '>': { + return 'gt'; + } + case '>=': { + return 'gte'; + } + case '<': { + return 'lt'; + } + case '<=': { + return 'lte'; + } + default: { + return operation; + } + } +} + +export interface SearchClause { + toElasticsearch(): any; + toString(): string; +} + +export class SearchMatch implements SearchClause { + public value: string; + + constructor(value: string) { + this.value = value; + } + + toElasticsearch(): any { + return { + bool: { + minimum_should_match: 1, + should: [ + { + multi_match: { + query: this.value, + fields: [ + 'description', + 'name.simple^3', + 'fullName', + 'tags^2', + 'readme.contents', + '_collaborators.user.userName', + '_collaborators.user.displayName', + 'contributors.userName', + 'contributors.displayName', + 'files.path', + 'files.contents', + 'files.mimeType', + 'metadata.team' + ], + type: 'best_fields', + fuzziness: 'AUTO', + analyzer: 'simple' + } + }, + { + multi_match: { + query: this.value, + fields: [ + 'description', + 'name^3', + 'fullName', + 'tags^2', + 'readme.contents', + '_collaborators.user.userName', + '_collaborators.user.displayName', + 'contributors.userName', + 'contributors.displayName', + 'files.path', + 'files.contents', + 'files.mimeType', + 'metadata.team' + ], + type: 'phrase_prefix', + slop: 2, + analyzer: 'standard' + } + } + ] + } + }; + } + + toString(): string { + return this.value; + } +} + +export class SearchPhrase implements SearchClause { + public value: string; + + constructor(value: string) { + this.value = value; + } + + toElasticsearch(): any { + return { + bool: { + minimum_should_match: 1, + should: [ + { + multi_match: { + query: this.value, + fields: '*', + type: 'phrase' + } + } + ] + } + }; + } + + toString(): string { + return '"' + this.value + '"'; + } +} + +export class SearchTerm implements SearchClause { + public key: string; + public value: string; + + constructor(key: string, value: string) { + this.key = key; + this.value = value; + } + + toElasticsearch(): any { + if (this.value === '*') { + // Wildcard query, just ignore the entire term + return {}; + } + + if (this.key === 'author') { + // Special case for supporting either username or display name + return { + bool: { + filter: [ + { + bool: { + should: [ + { + term: { + 'contributors.userName.keyword': { + value: this.value + } + } + }, + { + term: { + 'contributors.displayName.keyword': { + value: this.value + } + } + } + ] + } + } + ] + } + }; + } + + return { + bool: { + filter: [ + { + term: { + [convertField(this.key)]: { + value: this.value + } + } + } + ] + } + }; + } + + toString(): string { + switch (this.key) { + case 'author': { + return this.value.includes(' ') ? `author:"${this.value}"` : `@${this.value}`; + } + case 'tag': { + return `#${this.value}`; + } + default: { + return this.value.includes(' ') ? `${this.key}:"${this.value}"` : `${this.key}:${this.value}`; + } + } + } +} + +export class SearchMultiTerm implements SearchClause { + public key: string; + public values: string[]; + + constructor(key: string, values: string[]) { + this.key = key; + this.values = values; + } + + toElasticsearch(): any { + return { + bool: { + filter: [ + { + terms: { + [convertField(this.key)]: this.values + } + } + ] + } + }; + } + + toString(): string { + return `${this.key}:{${this.values.map((v) => (v.includes(' ') ? `"${v}"` : v)).join(',')}}`; + } +} + +export class SearchRange implements SearchClause { + public key: string; + public operation: string; + public value: string; + + constructor(key: string, operation: string, value: string) { + this.key = key; + this.operation = operation; + this.value = value; + } + + toElasticsearch(): any { + return { + bool: { + filter: [ + { + range: { + [convertField(this.key)]: { + [convertOperation(this.operation)]: this.value + } + } + } + ] + } + }; + } + + toString(): string { + return `${this.key}:${this.operation}${this.value}`; + } +} + +export class SearchCompoundRange implements SearchClause { + public key: string; + public from: string; + public to: string; + + constructor(key: string, from: string, to: string) { + this.key = key; + this.from = from; + this.to = to; + } + + toElasticsearch(): any { + return { + bool: { + filter: [ + { + range: { + [convertField(this.key)]: { + gte: this.from, + lte: this.to + } + } + } + ] + } + }; + } + + toString(): string { + return `${this.key}:[${this.from} to ${this.to}]`; + } +} + +export class SearchNestedOrFilter implements SearchClause { + constructor(protected clauses: any[]) {} + + toElasticsearch() { + return { + bool: { + filter: [ + { + bool: { + should: this.clauses + } + } + ] + } + }; + } + + toString(): string { + throw new Error('Method not implemented.'); + } +} + +// +// Parsimmon language +// +const lang = Parsimmon.createLanguage({ + Word: () => { + return Parsimmon.regexp(/[^\s:]+/i); + }, + Words: (r) => { + return ( + r.Word + // Skip words immediately followed by a filter separator + // These are a type of Term + .notFollowedBy(r.FilterSeparator) + // Combine multiple words into one, separated by whitespace + // Must have at least one word + .sepBy1( + Parsimmon.whitespace.notFollowedBy( + // Stop combining words if we see any of these terms + // It will be parsed as a separate Token + Parsimmon.alt(r.String, r.AuthorTerm, r.TagTerm) + ) + ) + .map((words) => words.join(' ')) + ); + }, + CompoundRangeWord: () => { + return Parsimmon.regexp(/[^\s:[\]]+/i).fallback(''); + }, + MultiTermWord: () => { + return Parsimmon.regexp(/[^\s,:{}]+/i).fallback(''); + }, + String: () => { + // One of possible quotes, then sequence of anything + // except that quote (unless escaped), then the same quote + return Parsimmon.oneOf(`"'`).chain(function (q) { + return Parsimmon.alt( + Parsimmon.noneOf(`\\${q}`).atLeast(1).tie(), // everything but quote and escape sign + Parsimmon.string(`\\`).then(Parsimmon.any) // escape sequence like \" + ) + .many() + .tie() + .skip(Parsimmon.string(q)); + }); + }, + Match: (r) => { + return r.Words.map((value) => new SearchMatch(value)); + }, + Phrase: (r) => { + return r.String.map((s) => new SearchPhrase(s)); + }, + FilterSeparator: () => { + return Parsimmon.string(':'); + }, + AuthorTerm: (r) => { + return Parsimmon.string('@') + .then(r.Word.fallback('')) + .map((author) => { + return new SearchTerm('author', author); + }); + }, + TagTerm: (r) => { + return Parsimmon.string('#') + .then(r.Word.fallback('')) + .map((tag) => { + return new SearchTerm('tag', tag); + }); + }, + GenericTerm: (r) => { + return Parsimmon.seq(r.Word, r.FilterSeparator.then(Parsimmon.alt(r.String, r.Word.fallback('')))).map( + ([key, value]) => { + return new SearchTerm(key, value); + } + ); + }, + Term: (r) => { + return Parsimmon.alt(r.AuthorTerm, r.TagTerm, r.GenericTerm); + }, + MultiTerm: (r) => { + return Parsimmon.seq( + r.Word, + r.FilterSeparator.then( + Parsimmon.lookahead(/{(.*)}/).then( + Parsimmon.alt(r.String, r.MultiTermWord.fallback('')) + .trim(optWhitespace) + .sepBy(Parsimmon.string(',').trim(optWhitespace)) + .wrap(Parsimmon.string('{'), Parsimmon.string('}')) + ) + ) + ).map(([key, values]) => { + return new SearchMultiTerm(key, values); + }); + }, + RangeOperation: () => { + return Parsimmon.alt(Parsimmon.string('>='), Parsimmon.string('>'), Parsimmon.string('<='), Parsimmon.string('<')); + }, + Range: (r) => { + return Parsimmon.seq(r.Word, r.FilterSeparator.then(Parsimmon.seq(r.RangeOperation, r.Word.fallback('')))).map( + ([key, [operation, value]]) => { + return new SearchRange(key, operation, value); + } + ); + }, + CompoundRange: (r) => { + return Parsimmon.seq( + r.Word, + r.FilterSeparator.then( + Parsimmon.lookahead(/\[(.*)]/).then( + Parsimmon.seq(r.CompoundRangeWord, Parsimmon.string('to').trim(optWhitespace).then(r.CompoundRangeWord)).wrap( + Parsimmon.string('['), + Parsimmon.string(']') + ) + ) + ) + ).map(([key, [from, to]]) => { + return new SearchCompoundRange(key, from, to); + }); + }, + Token: (r) => { + return Parsimmon.alt(r.Phrase, r.CompoundRange, r.Range, r.MultiTerm, r.Term, r.Match); + }, + Query: (r) => { + // Query is a sequence of tokens separated by whitespace + return r.Token.sepBy(optWhitespace).trim(optWhitespace); + } +}); + +/** + * Parse a search query into a series of SearchClause objects. + * + * @param searchQuery Query text + */ +export function parseSearchQuery(searchQuery: string): SearchClause[] { + const clauses: SearchClause[] = lang.Query.tryParse(searchQuery); + return clauses; +} + +const mergeCustomizer = (objectValue: any, sourceValue: any): any => { + // Concat arrays together + if (isArray(objectValue)) { + return [...objectValue, ...sourceValue]; + } + if (isObject(objectValue)) { + return mergeWith(objectValue, sourceValue, mergeCustomizer); + } +}; + +export function toElasticsearch(clauses: SearchClause[]): SearchQuery { + const esFragments = clauses.map((clause) => clause.toElasticsearch()); + + // Reduce all fragments into a combined Elasticsearch query + const query = esFragments.reduce((accumulator, fragment) => { + return mergeWith(accumulator, fragment, mergeCustomizer); + }, {}); + + return query; +} + +export function toSearchQuery(clauses: SearchClause[]): string { + const stringFragments = clauses.map((clause) => clause.toString()); + + return stringFragments.join(' '); +} + +/** + * Parse a search query into an Elasticsearch query. + * + * @param searchQuery Query text + * @param modifier Optional function that can modify the parsed clauses before converting + */ +export function parseToElasticsearch( + searchQuery: string, + modifier?: (clauses: SearchClause[]) => SearchClause[] +): SearchQuery { + let clauses = parseSearchQuery(searchQuery); + + if (modifier) { + // Allow the modifier to update things + clauses = modifier([...clauses]); + } + + return toElasticsearch(clauses); +} diff --git a/packages/shared/test/search2.test.ts b/packages/shared/test/search2.test.ts new file mode 100644 index 000000000..4553e1158 --- /dev/null +++ b/packages/shared/test/search2.test.ts @@ -0,0 +1,481 @@ +/** + * Copyright 2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* eslint-disable no-console */ + +import { describe, expect, test } from 'vitest'; + +import { + parseSearchQuery, + parseToElasticsearch, + toElasticsearch, + toSearchQuery, + SearchCompoundRange, + SearchMatch, + SearchMultiTerm, + SearchPhrase, + SearchRange, + SearchTerm +} from '../src/search2'; + +describe('search', () => { + describe('parseSearchQuery', () => { + test('single word', () => { + const clauses: any[] = parseSearchQuery('avocado'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('avocado'); + }); + test('mixed-case word', () => { + const clauses: any[] = parseSearchQuery('Hipster'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('Hipster'); + }); + test('untrimmed word', () => { + const clauses: any[] = parseSearchQuery(' avocado '); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('avocado'); + }); + test('two words', () => { + const clauses: any[] = parseSearchQuery('avocado toast'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('avocado toast'); + }); + test('number as word', () => { + const clauses: any[] = parseSearchQuery('42'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('42'); + }); + test('symbols in words', () => { + const clauses: any[] = parseSearchQuery("1st Bank's $50"); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe("1st Bank's $50"); + }); + test('single term', () => { + const clauses: any[] = parseSearchQuery('tag:hotels'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].value).toBe('hotels'); + }); + test('two term', () => { + const clauses: any[] = parseSearchQuery('tag:hotels user:msmith'); + expect(clauses).toHaveLength(2); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[1]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].value).toBe('hotels'); + expect(clauses[1].key).toBe('user'); + expect(clauses[1].value).toBe('msmith'); + }); + test('quoted term', () => { + const clauses: any[] = parseSearchQuery('team:"Mergers & Acquisitions"'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('team'); + expect(clauses[0].value).toBe('Mergers & Acquisitions'); + }); + test('uncompleted term', () => { + const clauses: any[] = parseSearchQuery('tag:'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].value).toBe(''); + }); + test('word and term', () => { + const clauses: any[] = parseSearchQuery('insights user:msmith'); + expect(clauses).toHaveLength(2); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[1]).toBeInstanceOf(SearchTerm); + expect(clauses[0].value).toBe('insights'); + expect(clauses[1].key).toBe('user'); + expect(clauses[1].value).toBe('msmith'); + }); + test('terms', () => { + const clauses: any[] = parseSearchQuery('tag:{hotel,flight}'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMultiTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].values).toEqual(['hotel', 'flight']); + }); + test('quoted terms', () => { + const clauses: any[] = parseSearchQuery('tag:{"cellar door","auspicious bird"}'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMultiTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].values).toEqual(['cellar door', 'auspicious bird']); + }); + test('whitespace terms', () => { + const clauses: any[] = parseSearchQuery('tag:{hotel, flight}'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMultiTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].values).toEqual(['hotel', 'flight']); + }); + test('extra whitespace terms', () => { + const clauses: any[] = parseSearchQuery('tag:{ hotel , flight , cars }'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMultiTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].values).toEqual(['hotel', 'flight', 'cars']); + }); + test('double-quoted phrase', () => { + const clauses: any[] = parseSearchQuery('"powered by analysts"'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchPhrase); + expect(clauses[0].value).toBe('powered by analysts'); + }); + test('single-quoted phrase', () => { + const clauses: any[] = parseSearchQuery("'powered by analysts'"); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchPhrase); + expect(clauses[0].value).toBe('powered by analysts'); + }); + test('two phrases', () => { + const clauses: any[] = parseSearchQuery('"powered by analysts" "made with love"'); + expect(clauses).toHaveLength(2); + expect(clauses[0]).toBeInstanceOf(SearchPhrase); + expect(clauses[0].value).toBe('powered by analysts'); + expect(clauses[1]).toBeInstanceOf(SearchPhrase); + expect(clauses[1].value).toBe('made with love'); + }); + test('word phrase term', () => { + const clauses: any[] = parseSearchQuery('tag:cars "booking growth" infographics'); + expect(clauses).toHaveLength(3); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].value).toBe('cars'); + expect(clauses[1]).toBeInstanceOf(SearchPhrase); + expect(clauses[1].value).toBe('booking growth'); + expect(clauses[2]).toBeInstanceOf(SearchMatch); + expect(clauses[2].value).toBe('infographics'); + }); + test('one unclosed phrase', () => { + const clauses: any[] = parseSearchQuery('"powered by analysts'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('"powered by analysts'); + }); + test('one unclosed phrase (trailing)', () => { + const clauses: any[] = parseSearchQuery('powered by analysts"'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('powered by analysts"'); + }); + test('mixed unclosed phrase', () => { + const clauses: any[] = parseSearchQuery('powered by "analysts'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchMatch); + expect(clauses[0].value).toBe('powered by "analysts'); + }); + test('empty phrase', () => { + const clauses: any[] = parseSearchQuery('""'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchPhrase); + expect(clauses[0].value).toBe(''); + }); + test('adjoined unclosed phrase', () => { + const clauses: any[] = parseSearchQuery('"test search""'); + expect(clauses).toHaveLength(2); + expect(clauses[0]).toBeInstanceOf(SearchPhrase); + expect(clauses[0].value).toBe('test search'); + }); + test('@author', () => { + const clauses: any[] = parseSearchQuery('@username'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('author'); + expect(clauses[0].value).toBe('username'); + }); + test('#tag', () => { + const clauses: any[] = parseSearchQuery('#myTag'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('tag'); + expect(clauses[0].value).toBe('myTag'); + }); + test('range', () => { + const clauses: any[] = parseSearchQuery('updatedDate:>2020-03-01'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchRange); + expect(clauses[0].key).toBe('updatedDate'); + expect(clauses[0].operation).toBe('>'); + expect(clauses[0].value).toBe('2020-03-01'); + }); + test('two date ranges', () => { + const clauses: any[] = parseSearchQuery('updatedDate:>=2020-03-01 updatedDate:<=2020-10-01'); + expect(clauses).toHaveLength(2); + expect(clauses[0]).toBeInstanceOf(SearchRange); + expect(clauses[0].key).toBe('updatedDate'); + expect(clauses[0].operation).toBe('>='); + expect(clauses[0].value).toBe('2020-03-01'); + expect(clauses[1]).toBeInstanceOf(SearchRange); + expect(clauses[1].key).toBe('updatedDate'); + expect(clauses[1].operation).toBe('<='); + expect(clauses[1].value).toBe('2020-10-01'); + }); + test('two other ranges', () => { + const clauses: any[] = parseSearchQuery('publishedDate:2020-10-01'); + expect(clauses).toHaveLength(2); + expect(clauses[0]).toBeInstanceOf(SearchRange); + expect(clauses[0].key).toBe('publishedDate'); + expect(clauses[0].operation).toBe('<'); + expect(clauses[0].value).toBe('now-2d'); + expect(clauses[1]).toBeInstanceOf(SearchRange); + expect(clauses[1].key).toBe('publishedDate'); + expect(clauses[1].operation).toBe('>'); + expect(clauses[1].value).toBe('2020-10-01'); + }); + test('compound range', () => { + const clauses: any[] = parseSearchQuery('updatedDate:[2020-03-01 to 2020-10-01]'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchCompoundRange); + expect(clauses[0].key).toBe('updatedDate'); + expect(clauses[0].from).toBe('2020-03-01'); + expect(clauses[0].to).toBe('2020-10-01'); + }); + test('compound range with relative dates', () => { + const clauses: any[] = parseSearchQuery('updatedDate:[now-1d/d to now]'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchCompoundRange); + expect(clauses[0].key).toBe('updatedDate'); + expect(clauses[0].from).toBe('now-1d/d'); + expect(clauses[0].to).toBe('now'); + }); + test('incomplete compound range', () => { + const clauses: any[] = parseSearchQuery('updatedDate:[2020-03-01]'); + expect(clauses).toHaveLength(1); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('updatedDate'); + expect(clauses[0].value).toBe('[2020-03-01]'); + }); + test('unclosed compound range', () => { + const clauses: any[] = parseSearchQuery('updatedDate:[2020-03-01 to 2020-10-01'); + console.log(`unclosed compound range clauses is ${JSON.stringify(clauses, null, 2)}`); + expect(clauses).toHaveLength(2); + expect(clauses[0]).toBeInstanceOf(SearchTerm); + expect(clauses[0].key).toBe('updatedDate'); + expect(clauses[0].value).toBe('[2020-03-01'); + expect(clauses[1]).toBeInstanceOf(SearchMatch); + expect(clauses[1].value).toBe('to 2020-10-01'); + }); + }); + describe('toElasticsearch', () => { + test('word', () => { + const clauses: any[] = parseSearchQuery('avocado'); + const es = toElasticsearch(clauses); + expect(es).toMatchObject({ + bool: { + minimum_should_match: 1, + should: [ + { multi_match: { query: 'avocado', type: 'best_fields' } }, + { multi_match: { query: 'avocado', type: 'phrase_prefix' } } + ] + } + }); + }); + test('two words', () => { + const clauses: any[] = parseSearchQuery('avocado toast'); + const es = toElasticsearch(clauses); + expect(es).toMatchObject({ + bool: { + minimum_should_match: 1, + should: [ + { multi_match: { query: 'avocado toast', type: 'best_fields' } }, + { multi_match: { query: 'avocado toast', type: 'phrase_prefix' } } + ] + } + }); + }); + test('word and tag', () => { + const clauses: any[] = parseSearchQuery('tag:demo insight'); + const es = toElasticsearch(clauses); + expect(es).toMatchObject({ + bool: { + minimum_should_match: 1, + filter: [{ term: { 'tags.keyword': { value: 'demo' } } }], + should: [ + { multi_match: { query: 'insight', type: 'best_fields' } }, + { multi_match: { query: 'insight', type: 'phrase_prefix' } } + ] + } + }); + }); + test('terms', () => { + const clauses: any[] = parseSearchQuery('itemType:{insight,page}'); + const es = toElasticsearch(clauses); + expect(es).toMatchObject({ + bool: { + filter: [{ terms: { itemType: ['insight', 'page'] } }] + } + }); + }); + test('word, phrase, tag, author', () => { + const clauses: any[] = parseSearchQuery('"best practice" #demo insight @username'); + const es = toElasticsearch(clauses); + expect(es).toMatchObject({ + bool: { + filter: [ + { term: { 'tags.keyword': { value: 'demo' } } }, + { + bool: { + should: [ + { term: { 'contributors.userName.keyword': { value: 'username' } } }, + { term: { 'contributors.displayName.keyword': { value: 'username' } } } + ] + } + } + ], + should: [ + { + multi_match: { + query: 'best practice', + type: 'phrase' + } + }, + { multi_match: { query: 'insight', type: 'best_fields' } }, + { multi_match: { query: 'insight', type: 'phrase_prefix' } } + ] + } + }); + }); + test('two date ranges', () => { + const clauses: any[] = parseSearchQuery('updatedDate:>=2020-03-01 updatedDate:<=2020-10-01'); + const es = toElasticsearch(clauses); + expect(es).toMatchObject({ + bool: { + filter: [ + { + range: { + updatedAt: { + gte: '2020-03-01' + } + } + }, + { + range: { + updatedAt: { + lte: '2020-10-01' + } + } + } + ] + } + }); + }); + }); + describe('parseToElasticsearch', () => { + test('tag term', () => { + const es = parseToElasticsearch('tag:demo'); + expect(es).toMatchObject({ + bool: { + filter: [{ term: { 'tags.keyword': { value: 'demo' } } }] + } + }); + }); + test('author term', () => { + const es = parseToElasticsearch('author:username'); + expect(es).toMatchObject({ + bool: { + filter: [ + { + bool: { + should: [ + { term: { 'contributors.userName.keyword': { value: 'username' } } }, + { term: { 'contributors.displayName.keyword': { value: 'username' } } } + ] + } + } + ] + } + }); + }); + test('word, phrase, term', () => { + const es = parseToElasticsearch('"best practice" tag:demo insight'); + expect(es).toMatchObject({ + bool: { + minimum_should_match: 1, + filter: [{ term: { 'tags.keyword': { value: 'demo' } } }], + should: [ + { + multi_match: { + query: 'best practice', + type: 'phrase' + } + }, + { + multi_match: { + query: 'insight', + type: 'best_fields' + } + }, + { + multi_match: { + query: 'insight', + type: 'phrase_prefix' + } + } + ] + } + }); + }); + test('itemType terms', () => { + const es = parseToElasticsearch('itemType:{template, page}'); + expect(es).toMatchObject({ + bool: { + filter: [{ terms: { itemType: ['template', 'page'] } }] + } + }); + }); + }); + describe('toSearchQuery', () => { + test('word', () => { + const clauses: any[] = parseSearchQuery('avocado'); + const query = toSearchQuery(clauses); + expect(query).toBe('avocado'); + }); + test('two words', () => { + const clauses: any[] = parseSearchQuery('avocado toast'); + const query = toSearchQuery(clauses); + expect(query).toBe('avocado toast'); + }); + test('word and tag', () => { + const clauses: any[] = parseSearchQuery('tag:demo insight'); + const query = toSearchQuery(clauses); + expect(query).toBe('#demo insight'); + }); + test('word, phrase, tag, author', () => { + const clauses: any[] = parseSearchQuery('"best practice" #demo insight @username'); + const query = toSearchQuery(clauses); + expect(query).toBe('"best practice" #demo insight @username'); + }); + test('two date ranges', () => { + const clauses: any[] = parseSearchQuery('updatedDate:>=2020-03-01 updatedDate:<=2020-10-01'); + const query = toSearchQuery(clauses); + expect(query).toBe('updatedDate:>=2020-03-01 updatedDate:<=2020-10-01'); + }); + test('author full name', () => { + const clauses: any[] = parseSearchQuery('author:"John Doe"'); + const query = toSearchQuery(clauses); + expect(query).toBe('author:"John Doe"'); + }); + }); +});