-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpalindrome.py
131 lines (109 loc) · 2.96 KB
/
palindrome.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
from collections import defaultdict
import datrie
words = set()
past_shenase = [
'م',
'ی',
'',
'یم',
'ید',
'ند',
]
present_shenase = [
'م',
'ی',
'د',
'یم',
'ید',
'ند',
]
zamir = [
'م',
'ش',
'ت',
'مان',
'تان',
'شان',
]
short_verb = [
'م',
'ی',
'ه',
'یم',
'ید',
'ند',
]
persian_chars = set()
with open("verbs.txt") as f:
for line in f:
past, present = line.split("#")
if past:
for s in past_shenase:
words.add(past.strip() + s)
if present:
for s in present_shenase:
words.add(present.strip() + s)
with open("words.txt") as f:
for line in f:
word = line.split()[0].strip()
persian_chars.update(set(word))
words.add(word)
# for v in short_verb:
# words.add(word + v)
# for z in zamir:
# words.add(word + z)
# Ignore the difference of some similiar Farsi characters
trans_table = str.maketrans("صثطحذظضغئآ", "سستهزززقعا", "")
def normalized(string):
"""
:type string: str
"""
return string.translate(trans_table)
translated_words = defaultdict(set)
for w in words:
translated_words[normalized(w)].add(w)
# Triple word palindromes
trie = datrie.Trie(persian_chars)
rtrie = datrie.Trie(persian_chars)
for w in words:
trie[normalized(w)] = True
rtrie[normalized(w[::-1])] = True
palindromes = set()
cnt = 0
with open('output_triple.txt', 'w') as f:
for w1 in sorted(words):
cnt += 1
for w3 in rtrie.prefixes(w1):
if len(w1) == len(w3):
continue
w3 = w3[::-1]
w2_suf = w1[-(len(w1) - len(w3)):]
for w2 in rtrie.keys(w2_suf):
w2 = w2[::-1]
w2_pref = w2[:-len(w2_suf)]
if w2_pref == w2_pref[::-1]:
for rw1 in translated_words[w1]:
for rw2 in translated_words[w2]:
for rw3 in translated_words[w3]:
p = ' '.join([rw1, rw2, rw3])
f.write(p + '\n')
if not cnt % 79 or cnt == len(words):
print('\r' + str(cnt) + "\t / " + str(len(words)), end='')
# Double word palindromes
palindromes = set()
for w in sorted(words):
for ww in translated_words[normalized(w[::-1])]:
palindromes.add(w + ' ' + ww)
for ww in translated_words[normalized(w[::-1][1:])]:
palindromes.add(w + ' ' + ww)
for ww in translated_words[normalized(w[1:][::-1])]:
palindromes.add(ww + ' ' + w)
with open('output_double.txt', 'w') as f:
for p in sorted(palindromes):
f.write(p + "\n")
# Single word palindromes
with open('output_single.txt', 'w') as f:
for w in sorted(words):
if normalized(w) == normalized(w[::-1]):
f.write(w + '\n')