-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsweep.py
106 lines (90 loc) · 3.17 KB
/
sweep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import torch
import torch.optim as optim
from nvidia.dali.plugin.pytorch import DALIGenericIterator
import wandb
import tqdm as tq
import dataset
import model as model_module
import train as train_module
import utils
import model_parts
BATCH_SIZE = 32
EPOCHS = 75
TRAIN_DIR = "data/train"
VAL_DIR = "data/val"
PROJECT_NAME = "unet-resnet-sweep"
def build_optimizer(model, config):
if config.optimizer == "sgd":
optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, momentum=config.momentum)
elif config.optimizer == "adam":
optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
return optimizer
def build_model():
model = model_module.UNetResEncoder()
model = torch.compile(model)
return model.cuda()
def build_loader(batch_size):
utils.label_file_check(TRAIN_DIR)
utils.label_file_check(VAL_DIR)
train_loader = DALIGenericIterator(
[dataset.ColorDataset.dali_pipeline(image_dir=TRAIN_DIR,
batch_size=batch_size,
num_threads=4)],
['black', 'color'],
reader_name='Reader'
)
val_loader = DALIGenericIterator(
[dataset.ColorDataset.dali_pipeline(image_dir=VAL_DIR,
batch_size=batch_size,
num_threads=4)],
['black', 'color'],
reader_name='Reader'
)
return train_loader, val_loader
def train(config=None):
# Initialize a new wandb run
with wandb.init(config=config):
# If called by wandb.agent, as below,
# this config will be set by Sweep Controller
config = wandb.config
loss = model_parts.HuberLoss().cuda()
model = build_model()
train, val = build_loader(BATCH_SIZE)
optimizer = build_optimizer(model, config)
for epoch in tq.tqdm(range(EPOCHS), total=EPOCHS, desc='Epochs'):
avg_loss = train_module.train_epoch(model, optimizer, loss, train, "cuda:0")
val_loss = train_module.val_epoch(model, loss, val, "cuda:0")
wandb.log({"loss": avg_loss, "loss_val": val_loss, "epoch": epoch})
wandb.login()
# Can't figured out how to ignore momentum when using adam so I just split the sweep
# into 2 parts, one with adam + set momentum to 0 and the other with sgd and default
# momentum list
sgd = {
'method': 'grid',
'metric': {
'name': 'loss',
'goal': 'minimize'
},
'name': 'unet-sweep-sgd',
'parameters': {
'learning_rate': {'values': [0.00001, 0.0005, 0.0001, 0.001, 0.01, 0.1]},
'momentum': {'values': [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]},
'optimizer': {'values': ["sgd"]}
}
}
sweep_id = wandb.sweep(sweep=sgd, project=PROJECT_NAME)
wandb.agent(sweep_id, train, project=PROJECT_NAME)
adam = {
'method': 'grid',
'metric': {
'name': 'loss',
'goal': 'minimize'
},
'name': 'unet-sweep-adam',
'parameters': {
'learning_rate': {'values': [0.00001, 0.0005, 0.0001, 0.001, 0.01, 0.1]},
'optimizer': {'values': ["adam"]}
}
}
sweep_id = wandb.sweep(sweep=adam, project=PROJECT_NAME)
wandb.agent(sweep_id, train)