-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_perceptron.py
88 lines (77 loc) · 3 KB
/
_perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Averaged perceptron classifier. Implementation geared for simplicity rather than
efficiency.
"""
from collections import defaultdict
import pickle
import random
class Perceptron(object):
def __init__(self):
# Each feature gets its own weight vector, so weights is a dict-of-dicts
self.weights = {}
self.classes = set()
# The accumulated values, for the averaging. These will be keyed by
# feature/clas tuples
self._totals = defaultdict(int)
# The last time the feature was changed, for the averaging. Also
# keyed by feature/clas tuples
# (tstamps is short for timestamps)
self._tstamps = defaultdict(int)
# Number of instances seen
self.i = 0
def predict(self, features):
'''Dot-product the features and current weights and return the best class.'''
scores = self.score(features)
# Do a secondary alphabetic sort, for stability
return max(self.classes, key=lambda clas: (scores[clas], clas))
def score(self, features):
scores = {0: 0, 1: 0, 2: 0, 3: 0}
all_weights = self.weights
for feat, value in features.items():
if value == 0:
continue
if feat not in all_weights:
continue
weights = all_weights[feat]
for clas, weight in weights.items():
scores[clas] += value * weight
return scores
def update(self, truth, guess, features):
def upd_feat(c, f, w, v):
param = (f, c)
self._totals[param] += (self.i - self._tstamps[param]) * w
self._tstamps[param] = self.i
self.weights[f][c] = w + v
self.i += 1
if truth == guess:
return None
for f in features:
weights = self.weights.setdefault(f, {})
upd_feat(truth, f, weights.get(truth, 0.0), 1.0)
upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
def average_weights(self):
for feat, weights in self.weights.items():
new_feat_weights = {}
for clas, weight in weights.items():
param = (feat, clas)
total = self._totals[param]
total += (self.i - self._tstamps[param]) * weight
averaged = round(total / float(self.i), 3)
if averaged:
new_feat_weights[clas] = averaged
self.weights[feat] = new_feat_weights
def save(self, path):
pickle.dump(dict(self.weights), open(path, 'w'))
def load(self, path):
self.weights = pickle.load(open(path))
def train(nr_iter, examples):
model = Perceptron()
for i in range(nr_iter):
random.shuffle(examples)
for features, class_ in examples:
scores = model.predict(features)
guess, score = max(scores.items(), key=lambda i: i[1])
if guess != class_:
model.update(class_, guess, features)
model.average_weights()
return model