diff --git a/packages/orama/src/components/tokenizer/index.ts b/packages/orama/src/components/tokenizer/index.ts index 1e5b4c4a5..82dc94f3b 100644 --- a/packages/orama/src/components/tokenizer/index.ts +++ b/packages/orama/src/components/tokenizer/index.ts @@ -16,15 +16,13 @@ export interface DefaultTokenizer extends Tokenizer { } export function normalizeToken(this: DefaultTokenizer, prop: string, token: string): string { - const key = `${this.language}:${prop}:${token}` - - if (this.normalizationCache.has(key)) { - return this.normalizationCache.get(key)! + if (this.normalizationCache.has(token)) { + return this.normalizationCache.get(token)! } // Remove stopwords if enabled - if (this.stopWords?.includes(token)) { - this.normalizationCache.set(key, '') + if (this.stopWords?.has(token)) { + this.normalizationCache.set(token, '') return '' } @@ -34,7 +32,7 @@ export function normalizeToken(this: DefaultTokenizer, prop: string, token: stri } token = replaceDiacritics(token) - this.normalizationCache.set(key, token) + this.normalizationCache.set(token, token) return token } diff --git a/packages/orama/tests/search.test.ts b/packages/orama/tests/search.test.ts index a4c905725..83008f7c8 100644 --- a/packages/orama/tests/search.test.ts +++ b/packages/orama/tests/search.test.ts @@ -725,7 +725,7 @@ t.test('search method', (t) => { t.test('with custom tokenizer', async (t) => { t.plan(4) - const normalizationCache = new Map([['english:foo:dogs', 'Dogs']]) + const normalizationCache = new Map([['dogs', 'Dogs']]) const db = await create({ schema: {