Skip to content

Commit

Permalink
[aii] Add Assyrian Neo-Aramaic (#1784)
Browse files Browse the repository at this point in the history
  • Loading branch information
fenakhay authored Jan 29, 2025
1 parent fda354b commit 9097e68
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 0 deletions.
32 changes: 32 additions & 0 deletions ext/js/language/aii/assyrian-neo-aramaic-text-preprocessors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright (C) 2024 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {basicTextProcessorOptions} from '../text-processors.js';

const optionalDiacritics = ['\u0303', '\u0304', '\u0307', '\u0308', '\u0323', '\u032E', '\u0330', '\u0331', '\u0730', '\u0731', '\u0732', '\u0733', '\u0734', '\u0735', '\u0736', '\u0737', '\u0738', '\u0739', '\u073A', '\u073B', '\u073C', '\u073D', '\u073E', '\u073F', '\u0740', '\u0741', '\u0742', '\u0743', '\u0744', '\u0745', '\u0746', '\u0747', '\u0748', '\u0749', '\u074A'];

const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g');

/** @type {import('language').TextProcessor<boolean>} */
export const removeSyriacScriptDiacritics = {
name: 'Remove diacritics',
description: 'ܟܵܬܹܒ݂ ⬅️ ܟܬܒ',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace(diacriticsRegex, '') : text;
},
};
10 changes: 10 additions & 0 deletions ext/js/language/language-descriptors.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {removeSyriacScriptDiacritics} from './aii/assyrian-neo-aramaic-text-preprocessors.js';
import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js';
import {normalizeRadicalCharacters} from './CJK-util.js';
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
Expand Down Expand Up @@ -58,6 +59,15 @@ const capitalizationPreprocessors = {

/** @type {import('language-descriptors').LanguageDescriptorAny[]} */
const languageDescriptors = [
{
iso: 'aii',
iso639_3: 'aii',
name: 'Assyrian Neo-Aramaic',
exampleText: 'ܟܵܬܹܒ݂',
textPreprocessors: {
removeSyriacScriptDiacritics,
},
},
{
iso: 'ar',
iso639_3: 'ara',
Expand Down
5 changes: 5 additions & 0 deletions types/ext/language-descriptors.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ type AlphabeticDiacriticsProcessor = {
* Any new language should be added to this object.
*/
type AllTextProcessors = {
aii: {
pre: {
removeSyriacScriptDiacritics: TextProcessor<boolean>;
};
};
ar: {
pre: {
removeArabicScriptDiacritics: TextProcessor<boolean>;
Expand Down

0 comments on commit 9097e68

Please sign in to comment.