-
Notifications
You must be signed in to change notification settings - Fork 1
/
fm1.py
176 lines (131 loc) · 5.26 KB
/
fm1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import pylast
import cPickle
import sys
import ConfigParser
def getFromUserDOM(dom, tag):
text = dom.firstChild.getElementsByTagName(tag)[0].firstChild
if text != None:
return text.data
else:
return 0
def getArtistFromName(name):
a = pylast.Artist(name,network)
return a
cfg = ConfigParser.ConfigParser()
cfg.read('settings.ini')
API_KEY = cfg.get('API','API_KEY')
API_SECRET = cfg.get('API','API_SECRET')
username = cfg.get('USER','username')
password_hash = pylast.md5(cfg.get('USER','password'))
network = pylast.get_lastfm_network(api_key = API_KEY, api_secret = API_SECRET, username = username, password_hash = password_hash)
# guess & print guessed age of the user 'preflex'
# formula:
# guessedAge = sum_i( playcount_i * weightedAverageAge_i ) / sum_i( playcount_i )
# with
# weightedAverageAge_i = sum_j( age_i_j * weight_i_j ) / sum_j( weight_i_j )
# playcount_i ...... playcount for user for artist i
# age_i_j .......... age of top fan j of artist i
# weight_i_j ....... last.fm's weight attributed to fan j with regard to artist i
me = network.get_user(username)
### testing
#print getArtistFromName(u'Opeth'.encode('latin-1')).get_name()
#print getArtistFromName(u'Sigur R\xf3s'.encode('latin-1')).get_name()
NUM_ARTISTS = 5
mytopartists = 0
fromLib = False
if len(sys.argv) < 2:
topartists = me.get_top_artists()[0:NUM_ARTISTS]
# print topartists
mytopartists = map(lambda dic: (dic['item'], int(dic['weight'])), topartists)
else:
theFile = open('mycharts','r')
topartists = cPickle.load(theFile)
# print topartists
# for x in topartists:
# print type(x[0])
# print x[0].decode('latin-1')
mytopartists = map(lambda (a, v): (getArtistFromName(a), v), topartists[0:NUM_ARTISTS])
fromLib = True
sum_playcount = 0
sum_playcountavgage = 0
my_inferred_age = 0
countries = {}
countryset = set()
## sum ( pcA * sum (genAF * weightAF) / totalWeightA ) ) / totalPCA
sum_playcountavggender = 0
for artist_weight in mytopartists:
artist = artist_weight[0]
weight = int(artist_weight[1])
print artist.get_name().encode('latin-1')
# print "%s x %d" %(artist.get_name().encode('latin-1'), weight)
fans = artist.get_top_fans()
sum_ageweight = 0
sum_weight = 0
avg_age_weighted = 0
sum_genweight = 0
avg_gen_weighted = 0
sum_weight_gen = 0
countries[artist] = [weight, []]
for fan_weight in fans:
# print fan_weight
fan = fan_weight['item']
fweight = fan_weight['weight']
info = fan.get_info()
age = int(getFromUserDOM(info, "age"))
country = getFromUserDOM(info, "country")
gender = getFromUserDOM(info, "gender")
if country != 0:
countries[artist][1].append( (country, fweight) )
countryset.add(country)
#print age
if age != 0:
sum_ageweight += age * fweight
sum_weight += fweight
if gender != 0:
numgen = 0
if gender == 'f':
numgen = 1
sum_genweight += numgen * fweight
sum_weight_gen += fweight
if sum_weight == 0:
sum_weight = 1
if sum_weight_gen == 0:
sum_weight_gen = 1
avg_age_weighted = sum_ageweight / sum_weight
print "weighted avg age: %d" %(avg_age_weighted)
avg_gen_weighted = float(sum_genweight) / float(sum_weight_gen)
print "weighted avg gender: %f" %(avg_gen_weighted)
sum_playcount += int(weight)
sum_playcountavgage += int(weight) * avg_age_weighted
sum_playcountavggender += int(weight) * avg_gen_weighted
if sum_playcount == 0:
sum_playcount = 1
my_inferred_age = sum_playcountavgage / sum_playcount
print "i guess you are %d years old" %(my_inferred_age)
my_inferred_gender = float(sum_playcountavggender) / float(sum_playcount)
print "%f%% chance that you are female" %(my_inferred_gender * 100)
## now look at all countries that my fav artists' top fans are from
## for each country, sum the fan weights of top fans of every artist and weight it by the relative importance of that artist among my favourites
## take the sum of those weights and rank the countries by their scores
countryscores = []
for country in countryset:
score = 0
# print country
for artist in countries:
playcount_list = countries[artist]
playcount = playcount_list[0]
clist = playcount_list[1]
pc = int(playcount)
factor = float(pc)/float(sum_playcount)
sum_country_weights = 0
for country_weight in clist:
if country_weight[0] == country:
sum_country_weights += country_weight[1]
score += factor * sum_country_weights
countryscores.append( (country, score) )
sortedscores = sorted(countryscores, key = lambda c_s : c_s[1], reverse = True)
## calculare the probability that user is from a particular country, for the top five countries
totalscores = sum(map(lambda x : x[1], sortedscores))
for item in sortedscores[0:5]:
relative = float(item[1]) / float(totalscores)
print "chance of you being from %s: %f%%" % (item[0].encode('latin-1'), relative * 100)