-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathValenceAssigner.py
executable file
·113 lines (93 loc) · 3.17 KB
/
ValenceAssigner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
Mihai Tabara @ 2013
Emilia Ciobanu @ 2013
"""
import csv
from StringIO import StringIO
from ValenceTextHandler import ValenceTextHandler
class AnewValences:
def __init__(self, file_input):
self._valences = {}
self.file_input = file_input
def handle(self):
with open(self.file_input,'rb') as fin:
dr = csv.DictReader(fin)
for i in dr:
self._valences[i['Description']] = float(i['Valence Mean'])
@property
def valences(self):
self.handle()
return self._valences
class ValenceAssigner:
"""
ValenceAssigner class is the class to assign a general valence to a text.
* [input] - list containing valuable words
* [output] - total valuance of text
"""
def __init__(self, file_input):
self.helper = ValenceTextHandler(file_input)
self.input_words = self.helper.words
self.anew = AnewValences("scripts/male.csv")
self.anew_valences = self.anew.valences
# default neutral psychological valence of 4.5 in a scale of 1-9
self._text_valence = float(4.5)
# list with words to be added to anew from this
self._unrated = []
def _handle_valences(self):
up = float(0.0)
down = float(0.0)
for key, value in self.anew_valences.items():
freq = self.input_words.count(key)
up += value * freq
down += freq
try:
self._text_valence = up / down
except ZeroDivisionError:
# leave to default value
pass
def _handle_unrated(self):
self._unrated = filter(lambda k: k not in self.anew_valences.keys(),
self.input_words)
self._unrated = list(set(self._unrated))
@property
def text_valence(self):
self._handle_valences()
return self._text_valence
@property
def unrated(self):
self._handle_unrated()
return self._unrated
def generate_csv(header, rows):
output = StringIO()
csv_writer = csv.writer(output)
csv_writer.writerow(header)
for item in rows:
csv_writer.writerow([value.encode('utf-8') for value in item])
return output.getvalue()
if __name__=="__main__":
stories = ["aladdin_and_the_wonder_lamp",
"beauty_and_the_beast",
"cinderlla",
"hansel_and_gretel",
"history_of_jack_and_giant_killer",
"rapunzel",
"red_riding_hood",
"sleeping_beauty",
"snow_white",
"the_frog_prince",
"tom_thumb"]
# gather all unrated words from all texts
all_unrated = []
for story in stories:
v = ValenceAssigner("stories/%s" % story)
#print v.text_valence
all_unrated.extend(v.unrated)
all_unrated = list(set(all_unrated))
columns = []
for word in all_unrated:
columns.append([word, '4.5', '4.5', '4.5', '1'])
header = ['Description', 'Valence Mean', 'Arousal Mean',
'Dominance Mean', 'People Counter']
answer = generate_csv(header, columns)
with open("unrated.csv", "w") as fo:
fo.write(answer)