Skip to content

Commit

Permalink
constants in a new file
Browse files Browse the repository at this point in the history
  • Loading branch information
kariminf committed Jul 23, 2019
1 parent 781e604 commit 06d8fb1
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 54 deletions.
17 changes: 10 additions & 7 deletions aruudy/lists/change.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@
u"ذلكم": u"ذَالِكُم",
u"الله": u"أَللَاه",
u"إله": u"إِلَاه",
u"لإله": u"لإِلَاه",
u"الإله": u"الإِلَاه",
}


def modify(word):
res = word
nodiac = re.sub(ur'[%s]' % const.DIAC, "", word)
if nodiac in CHANGE_LST:
res = CHANGE_LST[nodiac]
return res
m = re.match(u"((?:%s)?)(.*)([%s]?)" % (const.SPREP, const.DIAC), word)
begining = m.group(1)
nodiac = re.sub(u"[%s]" % const.DIAC , "", m.group(2))
ending = m.group(3)
res = CHANGE_LST.get(nodiac, "")
if res:
return begining + res + ending

return word
31 changes: 28 additions & 3 deletions aruudy/lists/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.

TASHKIIL = [u'ِ', u'ُ', u'َ', u'ْ']
TANWIIN = [u'ٍ', u'ٌ', u'ً']
TASHKIIL = [u"ِ", u"ُ", u"َ", u"ْ"]
TANWIIN = [u"ٍ", u"ٌ", u"ً"]
SHADDA = u"ّ"
# unknown diacretic (haraka)
UHARAKA = u"\u0653"

DIAC = u'ّ'.join(TASHKIIL).join(TANWIIN)
# Add shdda and madda
# Madda, in our case, is used to indicate an unknown haraka
DIAC = UHARAKA.join(SHADDA).join(TASHKIIL).join(TANWIIN)

# sun letters in arabic
SUN = u"[تثدذرزسشصضطظلن]"

# Sticky prepositions (bi-, li-)kasra? or (ka-, fa-, wa-)fatha?
# kasra and fatha can be madda in case there is no tashkiil
SPREP = "[\u0644\u0628][\u0650%s]?|[\u0643\u0641\u0648][\u064E%s]?" % (UHARAKA, UHARAKA)

# alif in the middle of sentence
# DORJ = spaces or (bi-, li-)kasra? or (ka-, fa-, wa-)fatha?
DORJ = u"[^\\s]\\s+|%s" % SPREP

# ahruf al3illa: alif, alif maqsura, waw, yaa
ILLA = u"[اىوي]"

TATWEEL = {
u"\u064E": u"\u064E\u0627",
u"\u064F": u"\u064F\u0648",
u"\u0650": u"\u0650\u064A",
}
48 changes: 16 additions & 32 deletions aruudy/poetry/prosody.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,7 @@

import re
from aruudy.poetry import meter, foot
from aruudy.lists import change

# sun letters in arabic
SUN = u"([تثدذرزسشصضطظلن])"
# alif in the middle of sentence
# DORJ = spaces or (bi-, li-)kasra? or (ka-, fa-, wa-)fatha?
DORJ = u"([^\\s]\\s+|[\u0644\u0628][\u0650]?|[\u0643\u0641\u0648][\u064E]?)"

# ahruf al3illa: alif, alif maqsura, waw, yaa
ILLA = u"[اىوي]"

from aruudy.lists import change, const

def normalize(text):
"""Normalize a given text.
Expand Down Expand Up @@ -102,10 +92,10 @@ def normalize(text):
res = re.sub(u"([^\u064B-\u0652\\s])\u064A([^\u064B-\u0652]|$)", u"\\1\u0650\u064A\\2", res)

# add Shadda to shamsi characters after al-
res = re.sub(u"(^|\\s)\u0627\u0644" + SUN + u"([^\u0651])", u"\\1\u0627\u0644\\2\u0651\\3", res)
res = re.sub(u"(^|\\s)\u0627(\u064E?)\u0644(%s)([^\u0651])" % const.SUN, u"\\1\u0627\\2\u0644\\3\u0651\\4", res)

# add madda to other characters after al-
res = re.sub(u"((?:^|\\s)\u0627\u0644[^\u0651])([^\u064E-\u0651])", u"\\1\u0653\\2", res)
res = re.sub(u"((?:^|\\s)\u0627\u0644[^\u0651])([^\u064E-\u0651])", u"\\1%s\\2" % const.UHARAKA, res)

# add kasra to li
res = re.sub(u"(^|\\s)\u0644([^\u064E-\u0652])", u"\\1\u0644\u0650\\2", res)
Expand All @@ -120,43 +110,43 @@ def normalize(text):
res = re.sub(u"(^|\\s)\u0648([^\u064E-\u0652])", u"\\1\u0648\u064E\\2", res)

# madda over alif with no vocalization
res = re.sub(u"\u0623([^\u064B-\u0653\\s])", u"\u0623\u0653\\1", res)
res = re.sub(u"\u0623([^\u064B-\u0653\\s])", u"\u0623%s\\1" % const.UHARAKA, res)

# hamza under alif with no kasra
res = re.sub(u"\u0625([^\u0650])", u"\u0625\u0650\\1", res)

#shadda not followed by a diacritic: add a madda above
res = res = re.sub(u"\u0651([^\u064B-\u0650])", u"\u0651\u0653\\1", res)
res = res = re.sub(u"\u0651([^\u064B-\u0650])", u"\u0651%s\\1" % const.UHARAKA, res)

#add madda to any leading letter except alif
res = res = re.sub(u"(^|\\s)([^\u0627])([^\u064E-\u0653])", u"\\1\\2\u0653\\3", res)
res = res = re.sub(u"(^|\\s)([^\u0627])([^\u064E-\u0653])", u"\\1\\2%s\\3" % const.UHARAKA, res)

#after sukuun must be a haraka
res = res = re.sub(u"\u0652([^\\s])([^\u064B-\u0650\\s])", u"\u0652\\1\u0653\\2", res)
res = res = re.sub(u"\u0652([^\\s])([^\u064B-\u0650\\s])", u"\u0652\\1%s\\2" % const.UHARAKA, res)

return res

# https://ar.wikipedia.org/wiki/عروض

def _prosody_del(text):
res = text

# Replace al- with sun character (it can be preceded by prepositions bi- li-)
# والصِّدق، والشَّمس ---> وصصِدق، وَششَمس
res = re.sub(DORJ + u"\u0627\u0644" + SUN , u"\\1\\2", res)
res = re.sub(u"(%s)\u0627\u0644(%s)" % (const.DORJ, const.SUN) , u"\\1\\2", res)
res = re.sub(u"\u0627\u064E\u0644(%s)" % const.SUN , u"\u0627\u064E\\1", res)

# Replace al- with l otherwise
# # والكتاب، فالعلم ---> وَلكتاب، فَلعِلم
res = re.sub(DORJ + u"\u0627(\u0644[^\u064E-\u0650])", u"\\1\\2", res)
res = re.sub(u"(%s)\u0627(\u0644[^\u064E-\u0650])" % const.DORJ, u"\\1\\2", res)


# delete first alif of a word in the middle of sentence
# فاستمعَ، وافهم، واستماعٌ، وابنٌ، واثنان ---> فَستَمَعَ، وَفهَم، وَستِماعُن، وَبنُن، وَثنانِ
res = re.sub(DORJ + u"\u0627([^\\s][^\u064B-\u0651\u0653])" , u"\\1\\2", res)
res = re.sub(u"(%s)\u0627([^\\s][^\u064B-\u0651\u0653])" % const.DORJ , u"\\1\\2", res)

# delete ending alif, waw and yaa preceeding a sakin
# أتى المظلوم إلى القاضي فأنصفه قاضي العدل ---> أتَ لمظلوم إلَ لقاضي فأنصفه قاضِ لعدل.
res = re.sub(ILLA + u"\\s+(.[^\u064B-\u0651\u0653])", u" \\1", res)
res = re.sub(const.ILLA + u"\\s+(.[^\u064B-\u0651\u0653])", u" \\1", res)

# delete alif of plural masculin conjugation
# رجعوا ---> رجعو
Expand All @@ -171,12 +161,6 @@ def _prosody_del(text):

return res

tatweel = {
u"\u064E": u"\u064E\u0627",
u"\u064F": u"\u064F\u0648",
u"\u0650": u"\u0650\u064A",
}

def _prosody_add(text):
res = text

Expand All @@ -201,13 +185,13 @@ def _prosody_add(text):
# hamza mamduuda --> alif-hamza + fatha + alif + sukuun
res = re.sub(u"\u0622", u"\u0623\u064E\u0627\u0652", res)

res = re.sub(u"([\u064E-\u0650])$", lambda m: tatweel[m.group(1)], res)
res = re.sub(u"([\u064E-\u0650])$", lambda m: const.TATWEEL[m.group(1)], res)

return res

def _prosody_change(text):
res = text
res = re.sub(u"([^\s]+)", lambda m: change.modify(m.group(1)) , res)
res = re.sub(u"([^\s]+)", lambda m: change.modify(m.group(1)), res)
return res

#TODO trait these
Expand Down Expand Up @@ -247,9 +231,9 @@ def prosody_form(text):
"""
res = text
res = _prosody_del(text)
res = _prosody_add(res)
res = _prosody_change(res)
res = _prosody_del(res)
res = _prosody_add(res)
return res


Expand Down
20 changes: 8 additions & 12 deletions tests/meter_u.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from aruudy.poetry import meter
from aruudy.poetry.meter import Bahr, BahrError
from aruudy.poetry.meter import Bahr


def test_name_type():
Expand All @@ -41,21 +41,17 @@ def test_get_bahr():
assert meter.get_bahr("aaa") == None

def test_get_names():
assert meter.arabic_names()[0] == u"طويل"
assert meter.english_names()[0] == "long"
assert meter.trans_names()[0] == u"ṭawīl"
assert meter.get_names("arabic")[0] == u"طويل"
assert meter.get_names("english")[0] == "long"
assert meter.get_names("trans")[0] == u"ṭawīl"
assert meter.get_names()[0]["trans"] == u"ṭawīl"

def test_bahr():
b = meter.get_bahr("overtaking", dic=False)

assert b.test_property("name", u"mutadārik", "trans")
assert not b.test_property("name", u"kamil", "trans")
with pytest.raises(BahrError):
b.test_property("transliterate", u"kamil")

assert b.get_value("name", "trans") == u"mutadārik"
with pytest.raises(BahrError):
b.get_value("transliterate")
assert b.test_name("trans", u"mutadārik")
assert not b.test_name("trans", u"kamil")
#with pytest.raises(BahrError):
# b.test_property("transliterate", u"kamil")

assert b.to_dict() == meter.mutadaarik.to_dict()

0 comments on commit 06d8fb1

Please sign in to comment.