forked from itsLuisa/SHAPELURN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
learning.py
166 lines (142 loc) · 6.96 KB
/
learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# The following code was written by Christopher Potts and Percy Liang. We adjusted
# evaluate as written in the comments in this function.
#!/usr/bin/env python
"""
Defines the core learning framework.
The framework defined by `score`, `predict`, and `SGD` is defined in
section 3.2 of the paper. See `evenodd.py` for a simple example
(corresponding to table 3).
This core framework is also all that is needed for simple semantic
parsing: section 4.1 of the paper and `evaluate_semparse` in
`synthesis.py`.
For learning from denotations (section 4.2 of the paper), the
framework is defined by `score`, `predict`, and `LatentSGD`. See
`evaluate_interpretive` in `synthesis.py`.
We don't cover this in the paper, but `score`, `predict`, and
`LatentSGD` can also be used for semantic parsing where the full tree
structure of the logical form is hidden, and only the root node
logical expression is available for training. See
`evaluate_latent_semparse` in `synthesis.py`.
The function `evaluate` below provides a generic interface for showing
basic results for train/test sets.
"""
__author__ = "Christopher Potts and Percy Liang"
__credits__ = []
__license__ = "GNU general public license, version 2"
__version__ = "2.0"
__maintainer__ = "Christopher Potts"
__email__ = "See the authors' websites"
import re
import random
from collections import defaultdict
from operator import itemgetter
from itertools import product
def score(x=None, y=None, phi=None, w=None):
"""Calculates the inner product w * phi(x,y)."""
return sum(w[f]*count for f, count in list(phi(x, y).items()))
def predict(x=None, w=None, phi=None, classes=None, output_transform=(lambda x : x)):
scores = [(score(x, y_prime, phi, w), y_prime) for y_prime in classes(x)]
# Get the maximal score:
max_score = sorted(scores)[-1][0]
# Get all the candidates with the max score and choose one randomly:
y_hats = [y_alt for s, y_alt in scores if s == max_score]
return output_transform(random.choice(y_hats))
######################################################################
# Note: SGD and LatentSGD can be seen as differing only in how they
# choose the hidden variable y: for SGD, it is the same as the output
# seen in the training data, whereas LatentSGD chooses it as the
# highest scoring hidden variable. Thus, SGD and LatentSGD could be
# stated as abstractions of a single function, call it GenericSGD,
# differing only in the function used to choose this value: an
# identity function for SGD and the best prediction for LatentSGD (see
# the first line of the loop through the training data). We have not
# combined them here in order to keep the code readable, but combining
# could help bring out this insight (and make for more maintainable
# code).
######################################################################
def SGD(D=None, phi=None, classes=None, true_or_false=None, T=10, eta=0.1, output_transform=None):
"""Implements stochatic (sub)gradient descent, as in the paper.
`classes` should be a function of the input `x` for structure
prediction cases (where `classes` is `GEN`)."""
w = defaultdict(float)
for t in range(T):
random.shuffle(D)
for x, y in D:
# Get all (score, y') pairs:
#scores = [(score(x, y_alt, phi, w)+cost(y, y_alt), y_alt)
#for y_alt in classes(x)]
scores = {y_alt:(score(x, y_alt, phi, w)+cost(y, y_alt)) for y_alt in classes}
# Get the maximal score:
max_score = max(list(scores.values()))
# Get all the candidates with the max score and choose one randomly:
y_tildes = [y_alt for y_alt in scores if scores[y_alt] == max_score]
y_tilde = random.choice(y_tildes)
# Weight-update (a bit cumbersome because of the dict-based implementation):
actual_rep = phi(x, y)
predicted_rep = phi(x, y_tilde)
for f in set(list(actual_rep.keys()) + list(predicted_rep.keys())):
w[f] += eta * (actual_rep[f] - predicted_rep[f])
return w
def LatentSGD(D=None, phi=None, classes=None, T=10, eta=0.1, output_transform=None):
"""Implements stochatic (sub)gradient descent for the latent SVM
objective, as in the paper. classes is defined as GEN(x, d) for
each input x."""
w = defaultdict(float)
for t in range(T):
random.shuffle(D)
for x, d in D:
# Get the best viable candidate given the current weights:
y = predict(
x,
w,
phi=phi,
classes=(lambda z : [zd for zd in classes(z) if output_transform(zd) == d]))
# Get all (score, y') pairs:
scores = [(score(x, y_alt, phi, w)+cost(y, y_alt), y_alt)
for y_alt in classes(x)]
# Get the maximal score:
max_score = sorted(scores)[-1][0]
# Get all the candidates with the max score and chose one randomly:
y_tildes = [y_alt for s, y_alt in scores if s == max_score]
y_tilde = random.choice(y_tildes)
# Weight-update:
actual_rep = phi(x, y)
predicted_rep = phi(x, y_tilde)
for f in set(list(actual_rep.keys()) + list(predicted_rep.keys())):
w[f] += eta * (actual_rep[f] - predicted_rep[f])
return w
def cost(y, y_prime):
"""Cost function used by `SGD` (above) and `LatentSGD` (below)."""
#return 0.0 if y.components == y_prime.components else 1.0
costs = [0.0 if x in y_prime.components else 1.0 for x in y.components]
return sum(costs)/len(costs)
def evaluate(
phi=None,
optimizer=None,
train=None,
test=None,
classes=None,
true_or_false=None, # We add this argument
T=10,
eta=0.1,
output_transform=(lambda x : x)):
"""Generic interface for showing learning weights and train/test
results. optimizer should be `SGD` or `LatentSGD`, `classes` should be
a function of the inputs `x`, and `output_tranform` is used only by
models with latent variables. For examples of use, see `evenodd.py`
and `synthesis.py`."""
print("======================================================================")
print("Feature function: {}".format(phi.__name__))
w = optimizer(
D=train,
phi=phi,
T=T,
eta=eta,
classes=classes,
true_or_false=true_or_false,
output_transform=output_transform)
print("--------------------------------------------------")
print('Learned feature weights')
for f, val in sorted(list(w.items()), key=itemgetter(1), reverse=True):
print("{} {}".format(f, val))
return w # We don't let the trained model predict the denotation of the test sentences, but return the learnded weights