diff --git a/data/ca-ba/g2p/model.crf b/data/ca-ba/g2p/model.crf
index 4be9708..5de981e 100644
Binary files a/data/ca-ba/g2p/model.crf and b/data/ca-ba/g2p/model.crf differ
diff --git a/data/ca-ba/lexicon.db b/data/ca-ba/lexicon.db
index 024aa7e..c961947 100644
Binary files a/data/ca-ba/lexicon.db and b/data/ca-ba/lexicon.db differ
diff --git a/data/ca-ba/phonemes.txt b/data/ca-ba/phonemes.txt
index fcea1d5..51bfea1 100644
--- a/data/ca-ba/phonemes.txt
+++ b/data/ca-ba/phonemes.txt
@@ -1,5 +1,5 @@
 # https://en.wikipedia.org/wiki/Catalan_phonology
-# Catalan Central accent phonemes
+# Catalan phonemes
 
 p [p]ala
 b [b]ala
diff --git a/data/ca-ce/g2p/model.crf b/data/ca-ce/g2p/model.crf
index 4be9708..1e8f7a8 100644
Binary files a/data/ca-ce/g2p/model.crf and b/data/ca-ce/g2p/model.crf differ
diff --git a/data/ca-ce/lexicon.db b/data/ca-ce/lexicon.db
index a7679cb..f3fd9df 100644
Binary files a/data/ca-ce/lexicon.db and b/data/ca-ce/lexicon.db differ
diff --git a/data/ca-ce/phonemes.txt b/data/ca-ce/phonemes.txt
index fcea1d5..51bfea1 100644
--- a/data/ca-ce/phonemes.txt
+++ b/data/ca-ce/phonemes.txt
@@ -1,5 +1,5 @@
 # https://en.wikipedia.org/wiki/Catalan_phonology
-# Catalan Central accent phonemes
+# Catalan phonemes
 
 p [p]ala
 b [b]ala
diff --git a/data/ca-no/g2p/model.crf b/data/ca-no/g2p/model.crf
new file mode 100644
index 0000000..f7cdbdb
Binary files /dev/null and b/data/ca-no/g2p/model.crf differ
diff --git a/data/ca-no/language.yml b/data/ca-no/language.yml
new file mode 100644
index 0000000..413ee3b
--- /dev/null
+++ b/data/ca-no/language.yml
@@ -0,0 +1,50 @@
+---
+
+language:
+  name: "Nord-Occidental Catalan"
+  code: "ca-no"
+  phonemes: !env "${config_dir}/phonemes.txt"
+  keep_stress: true
+
+lexicon: !env "${config_dir}/lexicon.db"
+
+g2p:
+  model: !env "${config_dir}/g2p.fst"
+
+symbols:
+  casing: "lower"
+  number_regex: "^-?\\d+([,.]\\d+)*$"
+  token_split: "\\s+"
+  token_join: " "
+  minor_breaks:
+    - ","
+    - ":"
+    - ";"
+    - "..."
+  major_breaks:
+    - "."
+    - "?"
+    - "!"
+  replace:
+    "[\\<\\>\\(\\)\\[\\]\"]+": ""
+    "\\B'": "\""
+    "'\\B": "\""
+    "’": "'"
+    "'": ""
+    "-": ""
+    "l·l": "l"
+  punctuations:
+    - "\""
+    - "„"
+    - "“"
+    - "”"
+    - "«"
+    - "»"
+    - ","
+    - ":"
+    - ";"
+    - "."
+    - "?"
+    - "¿" 
+    - "!"
+    - "¡"
\ No newline at end of file
diff --git a/data/ca-no/lexicon.db b/data/ca-no/lexicon.db
new file mode 100644
index 0000000..6cb9e0e
Binary files /dev/null and b/data/ca-no/lexicon.db differ
diff --git a/data/ca-no/phonemes.txt b/data/ca-no/phonemes.txt
new file mode 100644
index 0000000..51bfea1
--- /dev/null
+++ b/data/ca-no/phonemes.txt
@@ -0,0 +1,44 @@
+# https://en.wikipedia.org/wiki/Catalan_phonology
+# Catalan phonemes
+
+p [p]ala
+b [b]ala
+t [t]ela
+d [d]onar
+k [k]ala
+ɡ [g]ala
+m [m]ala
+ɲ fa[ng]
+β aca[b]a
+ð ca[d]a
+ɣ ama[g]ar
+f [f]als
+v a[f]ganès
+s [s]ala
+z ca[s]a
+ʃ [x]oc
+ʒ mà[g]ic
+tʃ co[tx]e
+dʒ me[tg]e
+l [l]íquid
+ʎ [ll]amp
+r ca[rr]o
+ɾ ca[r]a
+w ve[u]en
+uw ca[u]re
+j ca[i]re
+y [i]a[i]a
+n [n]ena
+ŋ pi[n]güí
+ts po[ts]er
+dz do[tz]e
+
+# Vowels
+i r[i]c
+e c[e]c
+ɛ s[e]c
+a s[a]c
+ɔ f[o]c
+o s[ó]c
+u s[u]c
+ə [a]mor
\ No newline at end of file
diff --git a/data/ca-va/g2p/model.crf b/data/ca-va/g2p/model.crf
new file mode 100644
index 0000000..417e24c
Binary files /dev/null and b/data/ca-va/g2p/model.crf differ
diff --git a/data/ca-va/language.yml b/data/ca-va/language.yml
new file mode 100644
index 0000000..1d2074b
--- /dev/null
+++ b/data/ca-va/language.yml
@@ -0,0 +1,50 @@
+---
+
+language:
+  name: "Valencià Catalan"
+  code: "ca-va"
+  phonemes: !env "${config_dir}/phonemes.txt"
+  keep_stress: true
+
+lexicon: !env "${config_dir}/lexicon.db"
+
+g2p:
+  model: !env "${config_dir}/g2p.fst"
+
+symbols:
+  casing: "lower"
+  number_regex: "^-?\\d+([,.]\\d+)*$"
+  token_split: "\\s+"
+  token_join: " "
+  minor_breaks:
+    - ","
+    - ":"
+    - ";"
+    - "..."
+  major_breaks:
+    - "."
+    - "?"
+    - "!"
+  replace:
+    "[\\<\\>\\(\\)\\[\\]\"]+": ""
+    "\\B'": "\""
+    "'\\B": "\""
+    "’": "'"
+    "'": ""
+    "-": ""
+    "l·l": "l"
+  punctuations:
+    - "\""
+    - "„"
+    - "“"
+    - "”"
+    - "«"
+    - "»"
+    - ","
+    - ":"
+    - ";"
+    - "."
+    - "?"
+    - "¿" 
+    - "!"
+    - "¡"
\ No newline at end of file
diff --git a/data/ca-va/lexicon.db b/data/ca-va/lexicon.db
new file mode 100644
index 0000000..5b6518d
Binary files /dev/null and b/data/ca-va/lexicon.db differ
diff --git a/data/ca-va/phonemes.txt b/data/ca-va/phonemes.txt
new file mode 100644
index 0000000..51bfea1
--- /dev/null
+++ b/data/ca-va/phonemes.txt
@@ -0,0 +1,44 @@
+# https://en.wikipedia.org/wiki/Catalan_phonology
+# Catalan phonemes
+
+p [p]ala
+b [b]ala
+t [t]ela
+d [d]onar
+k [k]ala
+ɡ [g]ala
+m [m]ala
+ɲ fa[ng]
+β aca[b]a
+ð ca[d]a
+ɣ ama[g]ar
+f [f]als
+v a[f]ganès
+s [s]ala
+z ca[s]a
+ʃ [x]oc
+ʒ mà[g]ic
+tʃ co[tx]e
+dʒ me[tg]e
+l [l]íquid
+ʎ [ll]amp
+r ca[rr]o
+ɾ ca[r]a
+w ve[u]en
+uw ca[u]re
+j ca[i]re
+y [i]a[i]a
+n [n]ena
+ŋ pi[n]güí
+ts po[ts]er
+dz do[tz]e
+
+# Vowels
+i r[i]c
+e c[e]c
+ɛ s[e]c
+a s[a]c
+ɔ f[o]c
+o s[ó]c
+u s[u]c
+ə [a]mor
\ No newline at end of file
diff --git a/gruut-lang-ca/gruut_lang_ca/g2p/model.crf b/gruut-lang-ca/gruut_lang_ca/g2p/model.crf
index 4be9708..1e8f7a8 100644
Binary files a/gruut-lang-ca/gruut_lang_ca/g2p/model.crf and b/gruut-lang-ca/gruut_lang_ca/g2p/model.crf differ
diff --git a/gruut-lang-ca/gruut_lang_ca/lexicon.db b/gruut-lang-ca/gruut_lang_ca/lexicon.db
index a7679cb..f3fd9df 100644
Binary files a/gruut-lang-ca/gruut_lang_ca/lexicon.db and b/gruut-lang-ca/gruut_lang_ca/lexicon.db differ
diff --git a/gruut-lang-ca/gruut_lang_ca/phonemes.txt b/gruut-lang-ca/gruut_lang_ca/phonemes.txt
index fcea1d5..51bfea1 100644
--- a/gruut-lang-ca/gruut_lang_ca/phonemes.txt
+++ b/gruut-lang-ca/gruut_lang_ca/phonemes.txt
@@ -1,5 +1,5 @@
 # https://en.wikipedia.org/wiki/Catalan_phonology
-# Catalan Central accent phonemes
+# Catalan phonemes
 
 p [p]ala
 b [b]ala
diff --git a/gruut/const.py b/gruut/const.py
index 2199151..dad195e 100644
--- a/gruut/const.py
+++ b/gruut/const.py
@@ -19,6 +19,8 @@
     "ca": "ca-ce",
     "ca-ce": "ca-ce",
     "ca-ba": "ca-ba",
+    "ca-no": "ca-no",
+    "ca-va": "ca-va",
     "cs": "cs-cz",
     "de": "de-de",
     "en": "en-us",
diff --git a/gruut/lang.py b/gruut/lang.py
index aee9fda..ccfffaa 100644
--- a/gruut/lang.py
+++ b/gruut/lang.py
@@ -115,7 +115,7 @@ def get_settings(
         # Arabic
         return get_ar_settings(lang_dir, **settings_args)
 
-    if lang_only in {"ca-ce", "ca-ba"}:
+    if lang_only in {"ca-ce", "ca-ba", "ca-no", "ca-va"}:
         # Catalan
         return get_ca_settings(lang_dir, **settings_args)
 
@@ -835,13 +835,60 @@ def get_zh_settings(lang_dir=None, **settings_args) -> TextProcessorSettings:
 # Catalan (ca, Catalan)
 # -----------------------------------------------------------------------------
 
+# Pre-Process constants
+# Same for all accents in this version
+VOWEL_CHARS = ['a', 'ä', 'à', 'e', 'ë', 'é', 'è', 'i', 'í', 'ï', 'o', 'ö', 'ó', 'ò', 'u', 'ü', 'ú']
+ACCENTED_VOWEL_CHARS = ['à', 'é', 'è', 'í', 'ó', 'ò', 'ú']
+NUCLITIC_CHARS = ['a', 'à', 'e', 'é', 'è', 'í', 'ï', 'o', 'ó', 'ò', 'ú']
+ACCENT_CHANGES = {
+    "a" : "à",
+    "e" : "é",
+    "i" : "í",
+    "ï" : "í",
+    "o" : "ó",
+    "u" : "ú",
+    "ü" : "ú", 
+}
+INSEPARABLES = [
+    'bh', 'bl', 'br', 'ch', 'cl', 'cr', 'dh', 'dj', 'dr', 'fh', 'fh', 'fl', 'fr', \
+    'gh', 'gl', 'gr', 'gu', 'gü', 'jh', 'kh', 'kl', 'kr', 'lh', 'll', 'mh', \
+    'nh', 'ny', 'ph', 'pl', 'pr', 'qu', 'qü', 'rh', 'sh', 'th', 'th', 'tr', \
+    'vh', 'wh', 'xh', 'xh', 'yh', 'zh',
+]
+VOC_IR = ["cuir", "vair"]
+EINESGRAM = [
+    '-de-', '-en', '-hi', '-ho', '-i', '-i-', '-la', '-les', '-li', '-lo', '-los', '-me', '-ne', '-nos', \
+    '-se', '-te', '-us', '-vos', 'a', 'a-', 'al', 'als', 'amb', 'bi-', 'co', 'de', 'de-', 'del', 'dels', \
+    'el', 'els', 'em', 'en', 'ens', 'es', 'et', 'hi', 'ho', 'i', 'i-', 'la', 'les', 'li', 'lo', 'ma', \
+    'me', 'mon', 'na', 'pel', 'pels', 'per', 'que', 're', 'sa', 'se', 'ses', 'si', 'sos', 'sub', \
+    'ta', 'te', 'tes', 'ton', 'un', 'uns', 'us',
+]
+EXCEP_ACC = {
+    'antropologico': 'antropològico', 'arterio': 'artèrio', 'artistico': 'artístico', 'basquet': 'bàsquet', 'cardio': 'càrdio', \
+    'catolico': 'catòlico', 'cientifico': 'científico', 'circum': 'círcum', 'civico': 'cívico', 'democrata': 'demòcrata', \
+    'democratico': 'democràtico', 'dumping': 'dúmping', 'economico': 'econòmico', 'edgar': 'èdgar', 'fenicio': 'fenício', \
+    'filosofico': 'filosòfico', 'fisico': 'físico', 'fisio': 'físio', 'geografico': 'geogràfico', 'hetero': 'hétero', \
+    'higenico': 'higènico', 'higienico': 'higiènico', 'hiper': 'híper', 'historico': 'històrico', 'ibero': 'íbero', \
+    'ideologico': 'ideològico', 'input': 'ínput', 'inter': 'ínter', 'jonatan': 'jònatan', 'juridico': 'jurídico', 'labio': 'làbio', \
+    'linguo': 'línguo', 'literario': 'literàrio', 'logico': 'lògico', 'magico': 'màgico', 'maniaco': 'maníaco', 'marketing': 'màrketing', \
+    'oxido': 'òxido', 'petroleo': 'petròleo', 'politico': 'político', 'quantum': 'quàntum', 'quimico': 'químico', 'quimio': 'químio', \
+    'radio': 'ràdio', 'romanico': 'romànico', 'simbolico': 'simbòlico', 'socio': 'sòcio', 'super': 'súper', 'tecnico': 'tècnico', \
+    'teorico': 'teòrico', 'tragico': 'tràgico', 'traqueo': 'tràqueo',
+} 
+DIFT_DECR = ["au", "ai", "eu", "ei", "ou", "oi", "iu", "àu", "ui"]
+VOC_SOLA = ["a", "e", "i", "o", "u", "ï", "ü"]
+VOC_MES_S = ["as", "es", "is", "os", "us", "às", "ès"]
+EN_IN = ["en", "in", "àn"]
+
 # Pre-Process functions and classes
 
 from collections import deque
 
+# TODO review all functions, may need refactor
+# TODO define depending the dialect
 def vocal(carac: str) -> bool:
-    vocal_chars = ['a', 'à', 'e', 'é', 'è', 'i', 'í', 'ï', 'o', 'ó', 'ò', 'u', 'ü', 'ú']
-    return carac in vocal_chars
+    
+    return carac in VOWEL_CHARS
 
 def acaba_en_vocal(prefix: str) -> bool:
     darrer = prefix[-1]
@@ -863,8 +910,7 @@ def post_prefix_ok(resta: str) -> bool:
     return False
 
 def nuclitica(carac: str) -> bool:
-    nuclitic_chars = ['a', 'à', 'e', 'é', 'è', 'í', 'ï', 'o', 'ó', 'ò', 'ú']
-    return carac in nuclitic_chars
+    return carac in NUCLITIC_CHARS
 
 def gicf_suf(mot: str, pos: int, mots_voc_ir: typing.List[str]) -> bool:
         
@@ -1106,19 +1152,12 @@ def __init__(self, mot: str, es_adverbi: bool):
 
         self.load_insep()
 
-
     def load_insep(self):
 
         # Set self.insep_ and self.mots_voc_ir_
         
-        self.insep_ = [
-            'bh', 'bl', 'br', 'ch', 'cl', 'cr', 'dh', 'dj', 'dr', 'fh', 'fh', 'fl', 'fr', \
-            'gh', 'gl', 'gr', 'gu', 'gü', 'jh', 'kh', 'kl', 'kr', 'lh', 'll', 'mh', \
-            'nh', 'ny', 'ph', 'pl', 'pr', 'qu', 'qü', 'rh', 'sh', 'th', 'th', 'tr', \
-            'vh', 'wh', 'xh', 'xh', 'yh', 'zh',
-            ]
-        self.mots_voc_ir_ = ["cuir", "vair"]
-
+        self.insep_ = INSEPARABLES
+        self.mots_voc_ir_ = VOC_IR
 
     def troba_nuclis_mot(self):
 
@@ -1364,11 +1403,9 @@ def troba_nuclis_mot(self):
             mida = len(self.el_mot)
             self.pos_nuclis.append(mida - 3)
 
-
     def inseparable(self, tros: str) -> bool:
         return tros in self.insep_
     
-    
     def separa_sillabes(self, vec_sil: typing.List[str], els_nuclis: typing.List[int]) -> typing.Tuple[typing.List[str], typing.List[int]]:
         
         fronteres = []
@@ -1425,7 +1462,7 @@ def separa_sillabes(self, vec_sil: typing.List[str], els_nuclis: typing.List[int
                 fronteres.append(self.pos_nuclis[i] + 3)
 
             else:
-                _LOGGER.info(f"No puc separar en sillabes el mot {self.el_mot}, cluster massa gran, de longitud {longi}")
+                _LOGGER.debug(f"No puc separar en sillabes el mot {self.el_mot}, cluster massa gran, de longitud {longi}")
                 exit(1)   
 
         numsil = len(fronteres)
@@ -1454,25 +1491,20 @@ def separa_sillabes(self, vec_sil: typing.List[str], els_nuclis: typing.List[int
 
         return vec_sil, els_nuclis
 
-
     def empty(self) -> bool:
         return len(self.pos_nuclis) == 0
 
-
     def mot(self) -> str:
         return self.el_mot
 
-
     def nucli(self, i: int) -> typing.Union[int, None]:
         if 0 <= i < len(self.pos_nuclis):
             return self.pos_nuclis[i]
         return None
 
-
     def size(self) -> int:
         return len(self.pos_nuclis)
 
-
     def nuclis(self) -> typing.List[int]:
         return self.pos_nuclis
    
@@ -1495,35 +1527,13 @@ def __init__(self, mot: str):
         self.carrega_einesgram()
         self.carrega_exc_accent()
 
-
     def carrega_einesgram(self):
-
         # Set self.einesgram_
-        self.einesgram_ = [
-            '-de-', '-en', '-hi', '-ho', '-i', '-i-', '-la', '-les', '-li', '-lo', '-los', '-me', '-ne', '-nos', \
-            '-se', '-te', '-us', '-vos', 'a', 'a-', 'al', 'als', 'amb', 'bi-', 'co', 'de', 'de-', 'del', 'dels', \
-            'el', 'els', 'em', 'en', 'ens', 'es', 'et', 'hi', 'ho', 'i', 'i-', 'la', 'les', 'li', 'lo', 'ma', \
-            'me', 'mon', 'na', 'pel', 'pels', 'per', 'que', 're', 'sa', 'se', 'ses', 'si', 'sos', 'sub', \
-            'ta', 'te', 'tes', 'ton', 'un', 'uns', 'us',
-        ]
-
-    
+        self.einesgram_ = EINESGRAM
+  
     def carrega_exc_accent(self):
-
         # Set self.excep_acc (excepcions d'accentuacio)
-        self.excep_acc = {
-            'antropologico': 'antropològico', 'arterio': 'artèrio', 'artistico': 'artístico', 'basquet': 'bàsquet', 'cardio': 'càrdio', \
-            'catolico': 'catòlico', 'cientifico': 'científico', 'circum': 'círcum', 'civico': 'cívico', 'democrata': 'demòcrata', \
-            'democratico': 'democràtico', 'dumping': 'dúmping', 'economico': 'econòmico', 'edgar': 'èdgar', 'fenicio': 'fenício', \
-            'filosofico': 'filosòfico', 'fisico': 'físico', 'fisio': 'físio', 'geografico': 'geogràfico', 'hetero': 'hétero', \
-            'higenico': 'higènico', 'higienico': 'higiènico', 'hiper': 'híper', 'historico': 'històrico', 'ibero': 'íbero', \
-            'ideologico': 'ideològico', 'input': 'ínput', 'inter': 'ínter', 'jonatan': 'jònatan', 'juridico': 'jurídico', 'labio': 'làbio', \
-            'linguo': 'línguo', 'literario': 'literàrio', 'logico': 'lògico', 'magico': 'màgico', 'maniaco': 'maníaco', 'marketing': 'màrketing', \
-            'oxido': 'òxido', 'petroleo': 'petròleo', 'politico': 'político', 'quantum': 'quàntum', 'quimico': 'químico', 'quimio': 'químio', \
-            'radio': 'ràdio', 'romanico': 'romànico', 'simbolico': 'simbòlico', 'socio': 'sòcio', 'super': 'súper', 'tecnico': 'tècnico', \
-            'teorico': 'teòrico', 'tragico': 'tràgico', 'traqueo': 'tràqueo',
-        }   
-
+        self.excep_acc =   EXCEP_ACC
 
     def normalize_word(self, word: str) -> str:
 
@@ -1531,7 +1541,6 @@ def normalize_word(self, word: str) -> str:
 
         return word
 
-
     def segmenta(self, mot: str, final: typing.List[str]) -> typing.List[str]:
 
         # Word with prefixes segmentation 
@@ -1624,8 +1633,7 @@ def segmenta(self, mot: str, final: typing.List[str]) -> typing.List[str]:
         if no_te_prefix:
             final.append(mot)
             return final
-    
-    
+     
     def tracta_prefixos(self, inici: typing.List[str], final: typing.List[str]) -> typing.List[str]:
 
             # For each start word, 
@@ -1637,7 +1645,6 @@ def tracta_prefixos(self, inici: typing.List[str], final: typing.List[str]) -> t
             
             return final
 
-
     def parteix_mot(self):
         
         # Set parts
@@ -1649,15 +1656,13 @@ def parteix_mot(self):
             partmot = Part(tros)
             self.transpart_.append(partmot)
 
-    
     def no_es_nom_ment(self, mot: str) -> bool:
             
             if mot not in self.excepcions_gen:
                 return True
             else:
                 return False
-    
-    
+     
     def es_adverbi(self, mot: str) -> bool:
         
         pos = 0
@@ -1673,8 +1678,7 @@ def es_adverbi(self, mot: str) -> bool:
                 return False
         else:
             return False
-    
-    
+     
     def es_exc_accent(self, mot: str) -> str:
 
         if mot in self.excep_acc:
@@ -1682,7 +1686,6 @@ def es_exc_accent(self, mot: str) -> str:
 
         return mot
 
-
     def troba_nuclis_mot(self):
 
         for i in range(len(self.trossos_)):
@@ -1709,16 +1712,15 @@ def troba_nuclis_mot(self):
             else:
                 sillab = Sillaba(self.trossos_[i])
                 self.transpart_[i].push_back(sillab)
-    
-    
+
     def dotze_term(self, pnum: int) -> bool:
 
         # retorna cert quan es mot pla (paroxiton) ja sigui per les dotze terminacions o per ser un diftong decreixent
 
-        dift_decr = ["au", "ai", "eu", "ei", "ou", "oi", "iu", "àu", "ui"]
-        voc_sola = ["a", "e", "i", "o", "u", "ï", "ü"]
-        voc_mes_s = ["as", "es", "is", "os", "us", "às", "ès"]
-        en_in = ["en", "in", "àn"]
+        dift_decr = DIFT_DECR
+        voc_sola = VOC_SOLA
+        voc_mes_s = VOC_MES_S
+        en_in = EN_IN
 
         numsil = self.transpart_[pnum].size()
         darsil = self.transpart_[pnum].transsil_[numsil - 1].get_text()
@@ -1769,8 +1771,7 @@ def dotze_term(self, pnum: int) -> bool:
             return True
 
         return False
-
-    
+  
     def accentua_mot(self, pnum: int):
 
         numsil = self.transpart_[pnum].size()
@@ -1783,18 +1784,16 @@ def accentua_mot(self, pnum: int):
             # Otherwise, it's acute (aguda)
             self.transpart_[pnum].transsil_[numsil - 1].tonica()
     
-
     def einagram(self, mot: str) -> bool:
 
         if mot not in self.einesgram_:
             return False
         else:
-            return True
-    
+            return True 
 
     def troba_accent_tonic_mot(self):
 
-        vocaccent = ['à', 'é', 'è', 'í', 'ó', 'ò', 'ú']
+        vocaccent = ACCENTED_VOWEL_CHARS
         
         for pnum in range(len(self.trossos_)):
 
@@ -1864,29 +1863,19 @@ def troba_accent_tonic_mot(self):
                     else:
                         self.accentua_mot(pnum)
 
-
     def sillaba_accentua_mot(self):
             
             self.parteix_mot()
             self.troba_nuclis_mot()
             self.troba_accent_tonic_mot()
     
-
     def stress_tonic(self) -> str:
 
-        accent_changes = {
-            "a" : "à",
-            "e" : "é",
-            "i" : "í",
-            "ï" : "í",
-            "o" : "ó",
-            "u" : "ú",
-            "ü" : "ú", 
-        }
+        accent_changes = ACCENT_CHANGES
 
-        all_vowels = ['a', 'à', 'e', 'é', 'è', 'i', 'í', 'ï', 'o', 'ó', 'ò', 'u', 'ü', 'ú']
-        accented_vowels = ['à', 'é', 'è', 'í', 'ó', 'ò', 'ú']
-        unaccented_vowels = ['a', 'e', 'i', 'ï', 'o', 'u', 'ü']
+        all_vowels = VOWEL_CHARS
+        accented_vowels = ACCENTED_VOWEL_CHARS
+        unaccented_vowels = list(set(all_vowels) - set(accented_vowels))
 
         original_word = ""
         stressed_word = ""
@@ -1944,7 +1933,6 @@ def stress_tonic(self) -> str:
         
         return stressed_word
 
-
     def stress_word(self) -> str:
         
         self.motnorm_ = self.normalize_word(self.motorig_)
@@ -1959,6 +1947,8 @@ def stress_word(self) -> str:
 class CatalanPreProcessText:
     """Pre-processes text"""
 
+    # The preprocessing is the same for all accents in this version (variable lang is not used)
+
     def __init__(self, lookup_phonemes, settings_values: dict, lang: str):
 
         self.lookup_phonemes = lookup_phonemes
@@ -1983,23 +1973,38 @@ def __call__(self, text: str) -> str:
         preprocessed_tokens = []
         for token in tokens:
             
-            if token in breaks:
-                processed_token = token
-            else:
-                is_in_lexicon = self.lookup_phonemes(token) is not None
-                if is_in_lexicon:
+            try:
+                if token in breaks:
                     processed_token = token
                 else:
-                    tr = Transcripcio(token)
-                    processed_token = tr.stress_word()
+                    is_in_lexicon = self.lookup_phonemes(token) is not None
+                    if is_in_lexicon:
+                        processed_token = token
+                    else:
+                        tr = Transcripcio(token)
+                        processed_token = tr.stress_word()                    
+            except:
+                processed_token = token
+                _LOGGER.debug(f"Unable to stress token {token}.")
 
             preprocessed_tokens.append(processed_token)
         
         processed_text = "".join(preprocessed_tokens)
 
+        _LOGGER.debug(f"{text} preprocessed obtaining: {processed_text}")
+
         return processed_text
 
 
+# Post-Process constants
+# Only defined for "ca", "ca-ce" accent.
+# For the rest of accents, not post-processing is done
+
+PHONEME_VOWELS = ["'a", "'ɛ", "'ɔ", "'e", "'i", "'o", "'u", "ə", "i", "u"]
+PHONEME_STRESSED_VOWELS = ["'a", "'ɛ", "'ɔ", "'e", "'i", "'o", "'u"]
+PHONEME_HIGH_VOWELS = ["i", "u", "'i", "'u"]
+PHONEME_NEUTRAL_VOWELS = ["ə"]
+
 # Post-Process functions and classes
 
 from gruut.text_processor import DATA_PROP, WordNode, BreakWordNode, BreakNode, PunctuationWordNode
@@ -2020,19 +2025,16 @@ def identify_lang(nodes: typing.List[typing.Union[WordNode, BreakWordNode, Break
     return lang
 
 def phoneme_is_vowel(phoneme: str) -> bool:
-    vowels = ["'a", "'ɛ", "'ɔ", "'e", "'i", "'o", "'u", "ə", "i", "u"]
-    return phoneme in vowels
+    return phoneme in PHONEME_VOWELS
 
 def phoneme_is_stressed_vowel(phoneme: str) -> bool:
-    stressed_vowels = ["'a", "'ɛ", "'ɔ", "'e", "'i", "'o", "'u"]
-    return phoneme in stressed_vowels
+    return phoneme in PHONEME_STRESSED_VOWELS
 
 def phoneme_is_unstressed_vowel(phoneme: str) -> bool:
     return phoneme_is_vowel(phoneme) and not phoneme_is_stressed_vowel(phoneme)
 
 def phoneme_is_high_vowel(phoneme: str) -> bool:
-    high_vowels = ["i", "u", "'i", "'u"]
-    return phoneme in high_vowels
+    return phoneme in PHONEME_HIGH_VOWELS
 
 def phoneme_is_high_stressed_vowel(phoneme: str) -> bool:
     return phoneme_is_high_vowel(phoneme) and phoneme_is_stressed_vowel(phoneme)
@@ -2041,106 +2043,123 @@ def phoneme_is_high_unstressed_vowel(phoneme: str) -> bool:
     return phoneme_is_high_vowel(phoneme) and phoneme_is_unstressed_vowel(phoneme)
 
 def phoneme_is_neutral_vowel(phoneme: str) -> bool:
-    neutral_vowels = ["ə"]
-    return phoneme in neutral_vowels
+    return phoneme in PHONEME_NEUTRAL_VOWELS
 
 def fusion_if_needed(node_1: WordNode, node_2: WordNode, lang: str):
 
-    if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
-        return
-    else:
+    if lang in ["ca", "ca-ce"]:
+        if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
+            return
+        else:
 
-        last_phoneme_word_1 = node_1.phonemes[-1]
-        first_phoneme_word_2 = node_2.phonemes[0]
+            last_phoneme_word_1 = node_1.phonemes[-1]
+            first_phoneme_word_2 = node_2.phonemes[0]
 
-        # Case 1: high unstressed vowel + stressed vowel of the same timbre
-        if phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_high_stressed_vowel(first_phoneme_word_2) \
-            and last_phoneme_word_1 == first_phoneme_word_2.replace("'", ""):
-            # Case [i] + [i'] = [i'] or [u] + [u'] = [u']
-            node_1.phonemes.pop()
-            _LOGGER.debug(f"FUSION CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
-            
-        # Case 2: high unstressed vowel + high unstressed vowel of the same timbre
-        elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_high_unstressed_vowel(first_phoneme_word_2) \
-            and last_phoneme_word_1 == first_phoneme_word_2:
-            # Case [i] + [i] = [i] or [u] + [u] = [u]
-            node_1.phonemes.pop()
-            _LOGGER.debug(f"FUSION CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
-
-        # Case 3: neutral vowel + neutral vowel (except if any of the vowels is the proposition "a")
-        elif phoneme_is_neutral_vowel(last_phoneme_word_1) and phoneme_is_neutral_vowel(first_phoneme_word_2) \
-            and node_1.text != "a" and node_2.text != "a":
-            node_1.phonemes.pop()
-            _LOGGER.debug(f"FUSION CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+            # Case 1: high unstressed vowel + stressed vowel of the same timbre
+            if phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_high_stressed_vowel(first_phoneme_word_2) \
+                and last_phoneme_word_1 == first_phoneme_word_2.replace("'", ""):
+                # Case [i] + [i'] = [i'] or [u] + [u'] = [u']
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"FUSION CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                
+            # Case 2: high unstressed vowel + high unstressed vowel of the same timbre
+            elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_high_unstressed_vowel(first_phoneme_word_2) \
+                and last_phoneme_word_1 == first_phoneme_word_2:
+                # Case [i] + [i] = [i] or [u] + [u] = [u]
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"FUSION CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+
+            # Case 3: neutral vowel + neutral vowel (except if any of the vowels is the proposition "a")
+            elif phoneme_is_neutral_vowel(last_phoneme_word_1) and phoneme_is_neutral_vowel(first_phoneme_word_2) \
+                and node_1.text != "a" and node_2.text != "a":
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"FUSION CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+    else:
+        pass
             
 def elision_if_needed(node_1: WordNode, node_2: WordNode, lang: str):
 
-    if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
-        return
-    else:
+    if lang in ["ca", "ca-ce"]:
 
-        last_phoneme_word_1 = node_1.phonemes[-1]
-        first_phoneme_word_2 = node_2.phonemes[0]
+        if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
+            return
+        else:
+
+            last_phoneme_word_1 = node_1.phonemes[-1]
+            first_phoneme_word_2 = node_2.phonemes[0]
 
-        # Case 1: stressed vowel ['a], ['ɛ] or ['ɔ] + neutral vowel (except if any of the vowels is the proposition "a")
-        if (phoneme_is_stressed_vowel(last_phoneme_word_1) and not phoneme_is_high_vowel(last_phoneme_word_1)) \
-            and (phoneme_is_neutral_vowel(first_phoneme_word_2) and node_2.text != "a"):
-            node_2.phonemes.pop(0)
-            _LOGGER.debug(f"ELISION CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+            # Case 1: stressed vowel ['a], ['ɛ], ['e], ['o] or ['ɔ] + neutral vowel (except if any of the vowels is the proposition "a")
+            if (phoneme_is_stressed_vowel(last_phoneme_word_1) and not phoneme_is_high_vowel(last_phoneme_word_1)) \
+                and (phoneme_is_neutral_vowel(first_phoneme_word_2) and node_2.text != "a"):
+                node_2.phonemes.pop(0)
+                _LOGGER.debug(f"ELISION CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
             
+            # Case 2: neutral vowel + stressed vowel ['a], ['ɛ], ['e], ['o] or ['ɔ]
+            elif phoneme_is_neutral_vowel(last_phoneme_word_1) \
+                and (phoneme_is_stressed_vowel(first_phoneme_word_2) and not phoneme_is_high_vowel(first_phoneme_word_2)):
+                node_1.phonemes.pop()
+                _LOGGER.debug(f"ELISION CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+    else:
+        pass    
+
 def diphthong_if_needed(node_1: WordNode, node_2: WordNode, lang: str):
 
-    if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
-        return
-    else:
+    if lang in ["ca", "ca-ce"]:
 
-        last_phoneme_word_1 = node_1.phonemes[-1]
-        first_phoneme_word_2 = node_2.phonemes[0]
-        
-        # Case 1: stressed vowel + high unstressed vowel
-        if (phoneme_is_stressed_vowel(last_phoneme_word_1) and not phoneme_is_high_vowel(last_phoneme_word_1)) \
-            and phoneme_is_high_unstressed_vowel(first_phoneme_word_2):
-            if first_phoneme_word_2 == "i":
-                # Case [stressed vowel] + [i] = [stressed vowel + j], stressed vowel not 'i or 'u 
-                node_2.phonemes[0] = "j"
-                _LOGGER.debug(f"DIPTHONG CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
-                
-            elif first_phoneme_word_2 == "u":
-                # Case [stressed vowel] + [u] = [stressed vowel + uw], stressed vowel not 'i or 'u 
-                node_2.phonemes[0] = "uw"
-                _LOGGER.debug(f"DIPTHONG CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
-
-        # Case 2: high unstressed vowel + stressed vowel
-        elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_stressed_vowel(first_phoneme_word_2):
-            if last_phoneme_word_1 == "i" and first_phoneme_word_2 not in ["'i"] and node_1.text in ["hi", "ho", "i"]:
-                # Case [i] + [stressed] = [y + stressed vowel], i only from "hi", "ho" or "i"  
-                node_1.phonemes[-1] = "y" 
-                _LOGGER.debug(f"DIPTHONG CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
-                 
-            elif last_phoneme_word_1 == "u" and first_phoneme_word_2 not in ["'u"] and node_1.text in ["hi", "ho", "i"]:
-                # Case [u] + [stressed] = [u + stressed vowel], i only from "hi", "ho" or "i"  
-                pass
-        
-        # Case 3: unstressed vowel + high unstressed vowel
-        elif phoneme_is_neutral_vowel(last_phoneme_word_1) and phoneme_is_high_unstressed_vowel(first_phoneme_word_2):
-            if first_phoneme_word_2 == "i":
-                # Case [neutral vowel] + [i] = [neutral vowel + j]
-                node_2.phonemes[0] = "j"
-                _LOGGER.debug(f"DIPTHONG CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
-                
-            elif first_phoneme_word_2 == "u":
-                # Case [neutral vowel] + [u] = [neutral vowel + uw]
-                node_2.phonemes[0] = "uw"
-                _LOGGER.debug(f"DIPTHONG CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
-                
-        # Case 4: unstressed vowel + high unstressed vowel
-        elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_neutral_vowel(first_phoneme_word_2):
-            pass
+        if len(node_1.phonemes) == 0 or len(node_2.phonemes) == 0:
+            return
+        else:
+
+            last_phoneme_word_1 = node_1.phonemes[-1]
+            first_phoneme_word_2 = node_2.phonemes[0]
+            
+            # Case 1: stressed vowel + high unstressed vowel
+            if (phoneme_is_stressed_vowel(last_phoneme_word_1) and not phoneme_is_high_vowel(last_phoneme_word_1)) \
+                and phoneme_is_high_unstressed_vowel(first_phoneme_word_2):
+                if first_phoneme_word_2 == "i":
+                    # Case [stressed vowel] + [i] = [stressed vowel + j], stressed vowel not 'i or 'u 
+                    node_2.phonemes[0] = "j"
+                    _LOGGER.debug(f"DIPTHONG CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+                elif first_phoneme_word_2 == "u":
+                    # Case [stressed vowel] + [u] = [stressed vowel + uw], stressed vowel not 'i or 'u 
+                    node_2.phonemes[0] = "uw"
+                    _LOGGER.debug(f"DIPTHONG CASE 1 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+
+            # Case 2: high unstressed vowel + stressed vowel
+            elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_stressed_vowel(first_phoneme_word_2):
+                if last_phoneme_word_1 == "i" and first_phoneme_word_2 not in ["'i"] and node_1.text in ["hi", "ho", "i"]:
+                    # Case [i] + [stressed] = [y + stressed vowel], i only from "hi", "ho" or "i"  
+                    node_1.phonemes[-1] = "y" 
+                    _LOGGER.debug(f"DIPTHONG CASE 2 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+                elif last_phoneme_word_1 == "u" and first_phoneme_word_2 not in ["'u"] and node_1.text in ["hi", "ho", "i"]:
+                    # Case [u] + [stressed] = [u + stressed vowel], i only from "hi", "ho" or "i"  
+                    pass
             
+            # Case 3: unstressed vowel + high unstressed vowel
+            elif phoneme_is_neutral_vowel(last_phoneme_word_1) and phoneme_is_high_unstressed_vowel(first_phoneme_word_2):
+                if first_phoneme_word_2 == "i":
+                    # Case [neutral vowel] + [i] = [neutral vowel + j]
+                    node_2.phonemes[0] = "j"
+                    _LOGGER.debug(f"DIPTHONG CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+                elif first_phoneme_word_2 == "u":
+                    # Case [neutral vowel] + [u] = [neutral vowel + uw]
+                    node_2.phonemes[0] = "uw"
+                    _LOGGER.debug(f"DIPTHONG CASE 3 {node_1.text} {node_2.text}: {node_1.phonemes} {node_2.phonemes}")
+                    
+            # Case 4: unstressed vowel + high unstressed vowel
+            elif phoneme_is_high_unstressed_vowel(last_phoneme_word_1) and phoneme_is_neutral_vowel(first_phoneme_word_2):
+                pass
+    else:
+        pass   
+
 def ca_post_process_sentence(
     graph: GraphType, sent_node: SentenceNode, settings: TextProcessorSettings
 ):
     
+    # Create a list of relevant nodes
     nodes = []
     for dfs_node in nx.dfs_preorder_nodes(graph, sent_node.node):
         
@@ -2161,7 +2180,27 @@ def ca_post_process_sentence(
             nodes.append(typing.cast(PunctuationWordNode, node))
         
     lang = identify_lang(nodes)
-    
+
+    # HACK
+    # Training corpora includes an invalid sequence of phonemes: l ʎ l
+    # We fix that here, in the next iteration will be properly solved
+    phonemes_to_fix = "l ʎ l"
+    fixed_phonemes = "l l"
+    for node in nodes:
+
+        if node is None:
+            continue
+
+        if isinstance(node, WordNode):
+            if not (node.text and node.phonemes):
+                continue
+            phonemes_text = " ".join(node.phonemes)
+            if phonemes_to_fix in phonemes_text:
+                phonemes_text = phonemes_text.replace(phonemes_to_fix, fixed_phonemes)
+                node.phonemes = phonemes_text.split(" ")
+                _LOGGER.debug(f"FIX: phoneme sequence '{phonemes_to_fix}' fixed at {node.text}. Fixed transcription: {node.phonemes}")
+        
+    # Create a list of contiguous word nodes
     contiguous_word_nodes = []
     for node_1, node_2 in sliding_window(nodes, 2):
         
@@ -2194,7 +2233,7 @@ def get_ca_settings(lang_dir=None, **settings_args) -> TextProcessorSettings:
         lang = "ca"
 
     lookup_phonemes = settings_args["lookup_phonemes"]
-
+    
     settings_values = {
         "major_breaks": {".", "?", "!"},
         "minor_breaks": {",", ";", ":", "..."},
@@ -2289,6 +2328,4 @@ def __call__(
             self.phonemizer = SqlitePhonemizer(db_conn=db_conn, **self.phonemizer_args)
 
         assert self.phonemizer is not None
-        return self.phonemizer(word, role=role, do_transforms=do_transforms)
-
-
+        return self.phonemizer(word, role=role, do_transforms=do_transforms)
\ No newline at end of file