diff --git a/src/common/fluentVowels.ts b/src/common/fluentVowels.ts index 5496dd2..8df18b8 100644 --- a/src/common/fluentVowels.ts +++ b/src/common/fluentVowels.ts @@ -1,7 +1,4 @@ -import { ALL_CHARACTERS, CONSONANT_CHARACTERS } from '../constants'; - -const LETTERS = new Set(ALL_CHARACTERS); -const CONSONANTS = new Set(CONSONANT_CHARACTERS); +import { ALL_LETTERS, ALL_CONSONANTS } from '../substitutions'; export function markFluentVowel(word: string, add: string): string { let i = 0; @@ -26,7 +23,7 @@ export function inferFluentVowel(word: string): string { while (i > 0) { const char = word[i]; - if (!LETTERS.has(char)) { + if (!ALL_LETTERS.has(char)) { end = i; replaced = false; } @@ -53,7 +50,7 @@ function replaceFluentVowel(word: string, j: number): string { } function isLastSyllable(word: string, i: number, end: number): boolean { - if (i === end - 2) return CONSONANTS.has(word[i + 1]); + if (i === end - 2) return ALL_CONSONANTS.has(word[i + 1]); if (i === end - 3) return word[i + 1] === 'n' && word[i + 2] === 'j'; return false; } diff --git a/src/constants/alphabet.ts b/src/constants/alphabet.ts deleted file mode 100644 index 1bae862..0000000 --- a/src/constants/alphabet.ts +++ /dev/null @@ -1,5 +0,0 @@ -export { - ALL_CHARACTERS, - CONSONANT_CHARACTERS, - VOWEL_CHARACTERS, -} from '../substitutions'; diff --git a/src/constants/index.ts b/src/constants/index.ts index 47771c4..acfa977 100644 --- a/src/constants/index.ts +++ b/src/constants/index.ts @@ -1,3 +1,2 @@ -export * from './alphabet'; export * from './bcp47'; export * from './glagolitic'; diff --git a/src/noun/declensionNoun.ts b/src/noun/declensionNoun.ts index 8838dd8..d170f27 100644 --- a/src/noun/declensionNoun.ts +++ b/src/noun/declensionNoun.ts @@ -5,10 +5,11 @@ import { declensionAdjective } from '../adjective'; import { inferFluentVowel, markFluentVowel } from '../common'; import type { Noun } from '../partOfSpeech'; -import { matchEnd, removeBrackets, replaceStringAt } from '../utils'; +import { removeBrackets, replaceStringAt } from '../utils'; import { establishGender } from './establishGender'; -const AEEO = ['a', 'e', 'ę', 'o']; +// endings like -i, -u are not declinable usually +const AEEO$ = /[aeęo]$/; export function declensionNounFlat( rawNoun: string, @@ -189,7 +190,7 @@ function establish_root(noun: string, gender: string) { noun.lastIndexOf('ȯ'), ); - const hasVowelEnding = matchEnd(noun, [AEEO]); + const hasVowelEnding = AEEO$.test(noun); if (noun == 'lėv' || noun == 'lev') { result = 'ljv'; @@ -283,7 +284,7 @@ function nominative_sg(noun: string, root: string, gender: string) { if (gender == 'f2') { result = root; } - if (gender == 'f3' && root.lastIndexOf('v') == root.length - 1) { + if (gender == 'f3' && root.endsWith('v')) { result = root.substring(0, root.length - 1) + 'ȯv'; } else if (gender == 'f3') { result = noun; @@ -361,7 +362,7 @@ function instrumental_sg(root: string, gender: string) { result = root + 'ojų'; } else if (gender == 'f2') { result = root + 'jų'; - } else if (gender == 'f3' && root.lastIndexOf('v') == root.length - 1) { + } else if (gender == 'f3' && root.endsWith('v')) { result = root.substring(0, root.length - 1) + 'ȯvjų'; } else if (gender == 'f3') { result = root + 'jų'; diff --git a/src/substitutions.ts b/src/substitutions.ts index b43448a..d667120 100644 --- a/src/substitutions.ts +++ b/src/substitutions.ts @@ -1,19 +1,16 @@ -export const ALL_CHARACTERS = - 'aáàăâåąāæbcćçčdďđḓeéèĕêěëėęēǝfghiíìĭîīıjĵklĺľļłŀljmnńňñņnjoóòŏôöȯǫœpqrŕṙřsśšŠtťṱuúùŭûůũųūvwxyýzźżž'.split( - '', - ); +export const ALL_LETTERS = new Set( + 'aáàăâåąāæbcćçčdďđḓeéèĕêěëėęēǝfghiíìĭîīıjĵklĺľļłŀljmnńňñņnjoóòŏôöȯǫœpqrŕṙřsśšŠtťṱuúùŭûůũųūvwxyýzźżž', +); -export const ANY = ALL_CHARACTERS; +export const ALL_CONSONANTS = new Set( + 'bcćçčdďđḓfghklĺľļłŀljmnńňñņnjpqrŕṙřsśštťṱvwxzźżž', +); -export const CONSONANT_CHARACTERS = - 'bcćçčdďđḓfghklĺľļłŀljmnńňñņnjpqrŕṙřsśštťṱvwxzźżž'.split(''); +export const ALL_VOWELS = new Set( + 'aáàăâåąāæeéèĕêěëėęēǝiíìĭîīıoóòŏôöȯǫœuúùŭûůũųūyý', +); -export const CONSONANT = CONSONANT_CHARACTERS; - -export const VOWEL_CHARACTERS = - 'aáàăâåąāæeéèĕêěëėęēǝiíìĭîīıoóòŏôöȯǫœuúùŭûůũųūyý'.split(''); - -export const VOWEL = VOWEL_CHARACTERS; +export const VOWELS = new Set('aåeęěėioȯuųy'); export const LJ_NJ = ['lj', 'nj']; export const LJj_NJj = ['lj', 'ĺj', 'ľj', 'lj', 'nj', 'ńj', 'ňj', 'ñj', 'nj']; diff --git a/src/utils/index.ts b/src/utils/index.ts index bb2b3ac..86ed9d6 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -1,7 +1,5 @@ export * from './areArraysEqual'; export * from './compactArray'; -export * from './matchStart'; -export * from './matchEnd'; export * from './memoizeLastCall'; export * from './removeBrackets'; export * from './replaceStringAt'; diff --git a/src/utils/matchEnd.test.ts b/src/utils/matchEnd.test.ts deleted file mode 100644 index 25b267b..0000000 --- a/src/utils/matchEnd.test.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { matchEnd } from './matchEnd'; - -describe('matchEnd', () => { - it('should return ending when the string ends with the given suffix array', () => { - expect(matchEnd('dělati', [['e', 'ě'], 'lat', ['i', 'ı']])).toBe('ělati'); - }); - - it('should return empty string when the string ends with an empty suffix array', () => { - expect(matchEnd('', [])).toBe(''); - }); - - it('should return empty string when the string does not match any suffix array', () => { - expect(matchEnd('dělåti', [['ě', 'e'], 'lat', ['i', 'ı']])).toBe(''); - }); - - it('should pick first working suffix among others', () => { - expect(matchEnd('abcdefg', ['de', ['fg', 'g']])).toBe('defg'); - expect(matchEnd('abcdefg', ['de', ['g', 'fg']])).toBe(''); - }); - - it('should optimize for single suffix array and single string', () => { - expect(matchEnd('dělati', ['i'])).toBe('i'); - expect(matchEnd('dělati', 'ti')).toBe('ti'); - }); -}); diff --git a/src/utils/matchEnd.ts b/src/utils/matchEnd.ts deleted file mode 100644 index 6f6dde3..0000000 --- a/src/utils/matchEnd.ts +++ /dev/null @@ -1,51 +0,0 @@ -export function matchEnd( - str: string, - suffixes: string | Array, -): string { - if (!Array.isArray(suffixes)) { - return str.endsWith(suffixes) ? suffixes : ''; - } - - if (suffixes.length === 1 && !Array.isArray(suffixes[0])) { - return matchEnd(str, suffixes[0]); - } - - const S = suffixes.length; - let end = str.length; - let ending = ''; - - for (let k = S - 1; k >= 0; k--) { - const start = findSuffixesIndex(str, suffixes[k], end); - if (start === -1) { - return ''; - } - - ending = str.slice(start, end) + ending; - end = start; - } - - return ending; -} - -function findSuffixesIndex( - str: string, - suffixes: string | string[], - end: number, -): number { - let cache = ''; - let len = 0; - - const arr = Array.isArray(suffixes) ? suffixes : [suffixes]; - for (const s of arr) { - if (len !== s.length) { - len = s.length; - cache = str.slice(end - len, end); - } - - if (cache === s) { - return end - len; - } - } - - return -1; -} diff --git a/src/utils/matchStart.test.ts b/src/utils/matchStart.test.ts deleted file mode 100644 index 1e1f8fc..0000000 --- a/src/utils/matchStart.test.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { matchStart } from './matchStart'; - -describe('matchStart', () => { - it('should return beginning when the string starts with the given prefix array', () => { - expect(matchStart('dělati', ['d', ['e', 'ě'], 'l'])).toBe('děl'); - }); - - it('should return empty string when the string starts with an empty prefix array', () => { - expect(matchStart('', [])).toBe(''); - }); - - it('should return empty string when the string does not match any prefix array', () => { - expect(matchStart('dělåti', [['ě', 'e'], 'lat', ['i', 'ı']])).toBe(''); - }); - - it('should pick first working prefix among others', () => { - expect( - matchStart('abcdefg', [ - ['ab', 'a'], - ['b', 'cd'], - ]), - ).toBe('abcd'); - - expect( - matchStart('abcdefg', [ - ['a', 'ab'], - ['b', 'cd'], - ]), - ).toBe('ab'); - }); - - it('should optimize for single prefix array and single string', () => { - expect(matchStart('dělati', ['dě'])).toBe('dě'); - expect(matchStart('dělati', 'děl')).toBe('děl'); - }); -}); diff --git a/src/utils/matchStart.ts b/src/utils/matchStart.ts deleted file mode 100644 index 7f5a9ae..0000000 --- a/src/utils/matchStart.ts +++ /dev/null @@ -1,51 +0,0 @@ -export function matchStart( - str: string, - prefixes: string | Array, -): string { - if (!Array.isArray(prefixes)) { - return str.startsWith(prefixes) ? prefixes : ''; - } - - if (prefixes.length === 1 && !Array.isArray(prefixes[0])) { - return matchStart(str, prefixes[0]); - } - - const P = prefixes.length; - let start = 0; - let prefix = ''; - - for (let k = 0; k < P; k++) { - const end = findPrefixesIndex(str, prefixes[k], start); - if (end === -1) { - return ''; - } - - prefix += str.slice(start, end); - start = end; - } - - return prefix; -} - -function findPrefixesIndex( - str: string, - prefixes: string | string[], - start: number, -): number { - let cache = ''; - let len = 0; - - const arr = Array.isArray(prefixes) ? prefixes : [prefixes]; - for (const p of arr) { - if (len !== p.length) { - len = p.length; - cache = str.slice(start, start + len); - } - - if (cache === p) { - return start + len; - } - } - - return -1; -} diff --git a/src/verb/conjugationVerb.ts b/src/verb/conjugationVerb.ts index d0e5db2..14d0d4d 100644 --- a/src/verb/conjugationVerb.ts +++ b/src/verb/conjugationVerb.ts @@ -2,17 +2,18 @@ * @source http://steen.free.fr/interslavic/conjugator.html */ -import { compactArray, matchEnd } from '../utils'; +import { compactArray } from '../utils'; import { parsePos, Verb } from '../partOfSpeech'; -import { - BIG_YUS, - BIG_YUS_LOOSE, - IOTATED_SMALL_YUS, - SMALL_YUS, -} from '../substitutions'; +import { BIG_YUS, IOTATED_SMALL_YUS, SMALL_YUS } from '../substitutions'; -const _SE = [' se', ' se']; -const SE_ = ['sę ', 'se ']; +const _SE = /. s[eę]$/; +const SE_ = /s[eę] $/; +const EVA_OVA = /[eo]va$/; +const NUU = /..n[uų]$/; +const OUEE = /^..?[eěou]$/; +const BDSZE = /[bdsz]ję$/; +const AEE = /[aeě]$/; +const MEUU = /[meuų-]$/; const PREFIXES = [ 'prědpo', @@ -48,16 +49,7 @@ const PREFIXES = [ 'v', ]; -const NON_REGULAR_VERBS = [ - 'věděti', - 'vedeti', - 'jesti', - 'jěsti', - 'dati', - 'dųti', - 'byti', - 'žegti', -]; +const NON_REGULAR_VERBS = /(v[eě]d[eě]ti|j[eě]sti|d[aų]ti|byti|žegti)$/; const irregular_stems = { da: 1, je: 1, jě: 1, ja: 1, vě: 1 }; @@ -253,9 +245,9 @@ function splitReflexive(inf: string) { function prefix(inf: string) { // get prefixes for some non-regular verbs - const irregular = matchEnd(inf, [NON_REGULAR_VERBS]); - if (irregular) { - const maybePrefix = inf.slice(0, -irregular.length); + const match = inf.match(NON_REGULAR_VERBS); + if (match) { + const maybePrefix = inf.slice(0, -match[1].length); if (PREFIXES.includes(maybePrefix)) { return maybePrefix; } @@ -370,13 +362,13 @@ function derive_present_tense_stem(infinitive_stem_string: string): string { result = 'uměĵ'; } else if (result === 'hova') { result = 'hovaĵ'; - } else if (matchEnd(result, [['o', 'e'], 'va'])) { + } else if (EVA_OVA.test(result)) { result = result.slice(0, -3) + 'uj'; - } else if (result.length > 3 && matchEnd(result, ['n', BIG_YUS_LOOSE])) { + } else if (NUU.test(result)) { result = result.slice(0, -1); - } else if (result.length < 4 && matchEnd(result, [['o', 'u', 'e', 'ě']])) { + } else if (OUEE.test(result)) { result = result + 'j'; - } else if (matchEnd(result, [['b', 'd', 's', 'z'], IOTATED_SMALL_YUS])) { + } else if (BDSZE.test(result)) { result = result.slice(0, -2) + 'ȯjm'; } else if (result.endsWith(IOTATED_SMALL_YUS)) { result = result.slice(0, -1) + 'm'; @@ -386,7 +378,7 @@ function derive_present_tense_stem(infinitive_stem_string: string): string { result = result.slice(0, -1) /*+ 'm'*/; } else if (result.endsWith('y')) { result = result + 'j'; - } else if (matchEnd(result, [['a', 'e', 'ě']])) { + } else if (AEE.test(result)) { result = result + 'ĵ'; } return result; @@ -398,9 +390,9 @@ function present_tense_stem(pref: string, pts: string, is: string) { if (pts.length == 0) { result = derive_present_tense_stem(is); } else { - if (matchEnd(pts, _SE) && pts.length > 3) { + if (_SE.test(pts)) { pts = pts.slice(0, -3); - } else if (matchEnd(pts, SE_)) { + } else if (SE_.test(pts)) { pts = pts.slice(3); } @@ -412,7 +404,7 @@ function present_tense_stem(pref: string, pts: string, is: string) { } } - if (matchEnd(pts, [['-', 'm', 'e', 'ų', 'u']])) { + if (MEUU.test(pts)) { result = pts.slice(0, -1); } else { result = pts;