From 9202f04f11d82f188e86d9ee7dfbce9b5d1ba1de Mon Sep 17 00:00:00 2001 From: Isaac Date: Tue, 2 Jan 2024 18:09:25 -0500 Subject: [PATCH] fix: update normalize regex to support arabic + hebrew (#74) --- src/logic.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/logic.ts b/src/logic.ts index 584c278..6d286b6 100644 --- a/src/logic.ts +++ b/src/logic.ts @@ -28,8 +28,9 @@ const normalize = (str: string | undefined) => { // Convert & to 'and' cleaned = cleaned.replace(/&/g, 'and'); - // Remove everything (including spaces) that is not a number, letter, Cyrylic alphabet, Polish alphabet - cleaned = cleaned.replace(/[^\wа-яА-ЯіїІЇ\dąćęłńóśźż\d]/g, ''); + // Remove everything (including spaces) that is not a number, letter, or from Cyrylic/Polish/Arabic/Hebrew alphabet + // (Github Copilot says Arabic letters range from \u0621 to \u064A and Hebrew letters range from \u05D0 to \u05EA) + cleaned = cleaned.replace(/[^\wа-яА-ЯіїІЇ\dąćęłńóśźż\u0621-\u064A\u05D0-\u05EA\d]/g, ''); // TODO: add any other logic?