forked from cltl/OpenDutchWordnet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlemma.py
103 lines (75 loc) · 2.61 KB
/
lemma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from collections import defaultdict
class Lemma():
'''
methods to access and manipulate resource lemma based
'''
def __init__(self):
pass
def lemmas_generator(self,pos=None):
'''
return dict of all lemmas
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
'''
lemmas = defaultdict(int)
for le_obj in self.les_get_generator():
lemma = le_obj.get_lemma()
if pos:
part_of_speech = le_obj.get_pos()
if pos == part_of_speech:
lemmas[lemma] += 1
else:
lemmas[lemma] += 1
return lemmas
def lemma_get_generator(self,lemma,pos=None):
'''
return generator of Le class instances
@type lemma: str
@param lemma: lemma
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
@rtype: list
@return: list of Le class instances
'''
les = []
for le_obj in self.les_get_generator():
can_lemma = le_obj.get_lemma()
if can_lemma == lemma:
if pos:
part_of_speech = le_obj.get_pos()
if pos == part_of_speech:
les.append(le_obj)
else:
les.append(le_obj)
return les
def lemma_num_senses(self,lemma,pos=None):
'''
return number of senses
@type lemma: str
@param lemma: lemma
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
@rtype: int
@return: number of senses
'''
return len(self.lemma_get_generator(lemma, pos))
def lemma_highest_sense_number(self,lemma,pos=None):
'''
return highest sense number of le instances of lemma
@type lemma: str
@param lemma: lemma
@type pos: str
@param pos: noun | verb.
Default is None, then no filtering is performed.
@rtype: int
@return: highest sense number
'''
highest = 0
for le_obj in self.lemma_get_generator(lemma, pos):
sense_id = int(le_obj.get_sense_number())
if sense_id > highest:
highest = sense_id
return highest