-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtrain.py
118 lines (94 loc) · 3.54 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import torch
import torch.nn as nn
def conv_block(in_channels, out_channels):
# bn = CustomBatchNorm()
bn = nn.BatchNorm2d(out_channels, momentum=0.01, track_running_stats = False)
# nn.init.uniform_(bn.weight) # for pytorch 1.2 or later
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, padding=1),
bn,
nn.ReLU(),
nn.MaxPool2d(2)
)
class Convnet(nn.Module):
def __init__(self, x_dim=3, hid_dim=64, z_dim=64):
super().__init__()
self.encoder = nn.Sequential(
conv_block(x_dim, hid_dim),
conv_block(hid_dim, hid_dim),
conv_block(hid_dim, hid_dim),
conv_block(hid_dim, hid_dim),
)
self.embeddings = nn.Linear(hid_dim*3*3, z_dim)
def forward(self, x):
x = self.encoder(x)
h = x.view(x.size(0), -1)
h = self.embeddings(h)
# h = nn.Softplus()(h)
return h
def count_acc(probs, label):
pred = probs > 0.5
return (pred == label).type(torch.FloatTensor).mean().item()
class Averager():
def __init__(self):
self.n = 0
self.v = 0
def add(self, x):
self.v = (self.v * self.n + x) / (self.n + 1)
self.n += 1
def item(self):
return self.v
def train(task_generator, forward_fn, config, model=None, xor_task=None, loss_fn=None):
# Prepare data
g = task_generator(config=config)
x_dim = g.input_shape[-1]
# Set up model
if model is None:
model = Convnet(x_dim=x_dim, z_dim=config.out_dim)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Train
if loss_fn is None:
loss_fn = nn.BCELoss()
for epoch in range(1, config.max_epoch + 1):
optimizer.zero_grad()
model.train()
# Get and reshape data
data_shot, label_shot, data_query, label_query = g.get_shot_query(config, device, xor_task=xor_task, prob_xor=config.prob_xor)
# Compute predictions
data_shot = model(data_shot)
data_query = model(data_query)
probs = forward_fn(data_shot, data_query, label_shot, config=config)
# Compute loss
loss = loss_fn(probs, label_query) # F.cross_entropy(logits, label_query)
acc = count_acc(probs, label_query)
loss.backward()
optimizer.step()
if config.verbose:
print('epoch {}, loss={:.4f} acc={:.4f}'.format(epoch, loss.item(), acc))
return model, g
def validate(task_generator, forward_fn, config, model=None, xor_task=None, loss_fn=None):
# Prepare data
g = task_generator(config=config)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Validate
model.eval()
accs = []
losses = []
if loss_fn is None:
loss_fn = nn.BCELoss()
with torch.no_grad():
for _ in range(config.nb_val_tasks):
# Get and reshape data
data_shot, label_shot, data_query, label_query = g.get_shot_query(config, device, validation=True, xor_task=xor_task)
# Compute predictions
data_shot = model(data_shot)
data_query = model(data_query)
probs = forward_fn(data_shot, data_query, label_shot, config=config)
# Compute distances and loss
loss = loss_fn(probs, label_query) # F.cross_entropy(logits, label_query)
acc = count_acc(probs, label_query)
losses.append(loss)
accs.append(acc)
return accs, losses