Skip to content

Commit

Permalink
Add pytorch implementation for vggnet
Browse files Browse the repository at this point in the history
  • Loading branch information
hassanmohsin committed Oct 22, 2019
1 parent ab4f9bd commit c3e3a15
Show file tree
Hide file tree
Showing 11 changed files with 144 additions and 1,938 deletions.
1 change: 1 addition & 0 deletions pytorch/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
results/
File renamed without changes.
46 changes: 30 additions & 16 deletions torch/main.py → pytorch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
from model import cnnscore
#from torch.utils.tensorboard import SummaryWriter
from model import cnnscore, vggnet
from dataset import CustomDataset

parser = argparse.ArgumentParser(description='PyTorch DLSCORE-CNN Training')
Expand Down Expand Up @@ -63,7 +64,7 @@
'multi node data parallel training')

best_loss = np.inf

#writer = SummaryWriter('runs/')

def main():
args = parser.parse_args()
Expand Down Expand Up @@ -94,7 +95,8 @@ def main():
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
mp.spawn(main_worker, nprocs=ngpus_per_node,
args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
Expand All @@ -118,7 +120,10 @@ def main_worker(gpu, ngpus_per_node, args):
world_size=args.world_size, rank=args.rank)
# create model
print("=> creating model '{}'".format('cnnscore'))
model = cnnscore()
#model = cnnscore()
model = vggnet()
print(model)


if args.distributed:
# For multiprocessing distributed, DistributedDataParallel constructor
Expand All @@ -131,8 +136,10 @@ def main_worker(gpu, ngpus_per_node, args):
# DistributedDataParallel, we need to divide the batch size
# ourselves based on the total number of GPUs we have
args.batch_size = int(args.batch_size / ngpus_per_node)
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
args.workers = int(
(args.workers + ngpus_per_node - 1) / ngpus_per_node)
model = torch.nn.parallel.DistributedDataParallel(
model, device_ids=[args.gpu])
else:
model.cuda()
# DistributedDataParallel will divide and allocate batch_size to all
Expand All @@ -148,7 +155,8 @@ def main_worker(gpu, ngpus_per_node, args):
# define loss function (criterion) and optimizer

criterion = nn.MSELoss().cuda(args.gpu)
optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999))
optimizer = torch.optim.Adam(
model.parameters(), args.lr, betas=(0.9, 0.999))

# optionally resume from a checkpoint
if args.resume:
Expand All @@ -163,6 +171,7 @@ def main_worker(gpu, ngpus_per_node, args):
args.start_epoch = checkpoint['epoch']
best_loss = checkpoint['best_loss']
if args.gpu is not None:
print(args.gpu)
# best_loss1 may be from a checkpoint from a different GPU
best_loss = best_loss.to(args.gpu)
model.load_state_dict(checkpoint['state_dict'])
Expand All @@ -181,14 +190,15 @@ def main_worker(gpu, ngpus_per_node, args):
train_dataset = CustomDataset(train_dir)
test_dataset = CustomDataset(val_dir)


if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_sampler = torch.utils.data.distributed.DistributedSampler(
train_dataset)
else:
train_sampler = None

train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
train_dataset, batch_size=args.batch_size, shuffle=(
train_sampler is None),
num_workers=args.workers, pin_memory=True, sampler=train_sampler)

val_loader = torch.utils.data.DataLoader(
Expand Down Expand Up @@ -216,7 +226,7 @@ def main_worker(gpu, ngpus_per_node, args):
best_loss = min(loss, best_loss)

if not args.multiprocessing_distributed or (args.multiprocessing_distributed
and args.rank % ngpus_per_node == 0):
and args.rank % ngpus_per_node == 0):
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
Expand Down Expand Up @@ -247,7 +257,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args):
target = target.cuda(args.gpu, non_blocking=True)

# compute output
output = model(input)
output, _ = model(input)
loss = criterion(output, target)

# measure and record loss
Expand All @@ -264,7 +274,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args):

if i % args.print_freq == 0:
progress.display(i)


def pearsonr(x, y):
"""
Expand Down Expand Up @@ -311,9 +321,11 @@ def validate(val_loader, model, criterion, args):
target = target.cuda(args.gpu, non_blocking=True)

# compute output
output = model(input)
output, last_layer_features = model(input)
#print(last_layer_features.shape)
np.save('l_features_' + str(i), last_layer_features)
loss = criterion(output, target)

# measure pearsonr and record loss
actual_values[i] = target.mean()
predicted_values[i] = output.mean()
Expand All @@ -329,7 +341,8 @@ def validate(val_loader, model, criterion, args):

pr = pearsonr(actual_values, predicted_values).item()
mseloss = criterion(predicted_values, actual_values).item()
print('Test: [{0}/{0}]\t Pearson R: {1:.4f}\t MSE loss: {2:.4f}'.format(len(val_loader), pr, mseloss))
print('Test: [{0}/{0}]\t Pearson R: {1:.4f}\t MSE loss: {2:.4f}'.format(
len(val_loader), pr, mseloss))

return mseloss

Expand All @@ -342,6 +355,7 @@ def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):

class AverageMeter(object):
"""Computes and stores the average and current value"""

def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
Expand Down
124 changes: 103 additions & 21 deletions torch/model.py → pytorch/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,93 +2,175 @@
import torch.nn as nn
import torch.nn.functional as F

__all__ = ['cnnscore']
__all__ = ['cnnscore', 'vggnet']

# Example VGGNet https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py


class VGGNet(nn.Module):
def __init__(self, features, init_weights=False):
super(VGGNet, self).__init__()

self.features = features
# TODO: Following
#self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
self.lastblock = nn.Sequential(
nn.Linear(512, 4096),
nn.ReLU(True),
# nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True)
#nn.Linear(4096, 1)
)
self.dense = nn.Linear(4096, 1)
if init_weights:
self._initialize_weights()

# TODO: Use the following initialization
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv3d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm3d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)

def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
x = self.lastblock(x)
last_layer_features = x.cpu().detach().numpy()
x = self.dense(x)

return x, last_layer_features


class CNNScore(nn.Module):
def __init__(self):
super(CNNScore, self).__init__()
self.conv1 = nn.Conv3d(16, 96, kernel_size=1, stride=2)
self.fire2_squeeze = nn.Conv3d(96, 16, kernel_size=1)
self.fire2_expand1 = nn.Conv3d(16, 64, kernel_size=1)
self.fire2_expand2 = nn.Conv3d(16, 64, kernel_size=3, padding=1) # Padding = (k-1)/2 where k is the kernel size

# Padding = (k-1)/2 where k is the kernel size
self.fire2_expand2 = nn.Conv3d(16, 64, kernel_size=3, padding=1)

self.fire3_squeeze = nn.Conv3d(128, 16, kernel_size=1)
self.fire3_expand1 = nn.Conv3d(16, 64, kernel_size=1)
self.fire3_expand2 = nn.Conv3d(16, 64, kernel_size=3, padding=1)

self.fire4_squeeze = nn.Conv3d(128, 32, kernel_size=1)
self.fire4_expand1 = nn.Conv3d(32, 128, kernel_size=1)
self.fire4_expand2 = nn.Conv3d(32, 128, kernel_size=3, padding=1)

self.pool = nn.MaxPool3d(kernel_size=3, stride=2)

self.fire5_squeeze = nn.Conv3d(256, 32, kernel_size=1)
self.fire5_expand1 = nn.Conv3d(32, 128, kernel_size=1)
self.fire5_expand2 = nn.Conv3d(32, 128, kernel_size=3, padding=1)

self.fire6_squeeze = nn.Conv3d(256, 48, kernel_size=1)
self.fire6_expand1 = nn.Conv3d(48, 192, kernel_size=1)
self.fire6_expand2 = nn.Conv3d(48, 192, kernel_size=3, padding=1)

self.fire7_squeeze = nn.Conv3d(384, 48, kernel_size=1)
self.fire7_expand1 = nn.Conv3d(48, 192, kernel_size=1)
self.fire7_expand2 = nn.Conv3d(48, 192, kernel_size=3, padding=1)

self.fire8_squeeze = nn.Conv3d(384, 64, kernel_size=1)
self.fire8_expand1 = nn.Conv3d(64, 256, kernel_size=1)
self.fire8_expand2 = nn.Conv3d(64, 256, kernel_size=3, padding=1)

self.avg_pool = nn.AvgPool3d(kernel_size=3, padding=1)

self.dense1 = nn.Linear(512*2*2*2, 1)



def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.fire2_squeeze(x))
expand1 = F.relu(self.fire2_expand1(x))
expand2 = F.relu(self.fire2_expand2(x))
merge1 = torch.cat((expand1, expand2), 1)

x = F.relu(self.fire3_squeeze(merge1))
expand1 = F.relu(self.fire3_expand1(x))
expand2 = F.relu(self.fire3_expand2(x))
merge2 = torch.cat((expand1, expand2), 1)

x = F.relu(self.fire4_squeeze(merge2))
expand1 = F.relu(self.fire4_expand1(x))
expand2 = F.relu(self.fire4_expand2(x))
merge3 = torch.cat((expand1, expand2), 1)
pool1 = self.pool(merge3)

x = F.relu(self.fire5_squeeze(pool1))
expand1 = F.relu(self.fire5_expand1(x))
expand2 = F.relu(self.fire5_expand2(x))
merge4 = torch.cat((expand1, expand2), 1)

x = F.relu(self.fire6_squeeze(merge4))
expand1 = F.relu(self.fire6_expand1(x))
expand2 = F.relu(self.fire6_expand2(x))
merge5 = torch.cat((expand1, expand2), 1)

x = F.relu(self.fire7_squeeze(merge5))
expand1 = F.relu(self.fire7_expand1(x))
expand2 = F.relu(self.fire7_expand2(x))
merge6 = torch.cat((expand1, expand2), 1)

x = F.relu(self.fire8_squeeze(merge6))
expand1 = F.relu(self.fire8_expand1(x))
expand2 = F.relu(self.fire8_expand2(x))
merge7 = torch.cat((expand1, expand2), 1)

pool2 = self.avg_pool(merge7)
x = pool2.view(-1, 512*2*2*2)
x = self.dense1(x)
#x = x.view(-1)

return x


def cnnscore(**kwargs):
model = CNNScore(**kwargs)
return model
return model


cfgs = {
'A': [64, 64, 64, 'M', 128, 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M']
}


def make_layers(cfg, batch_norm=True):
layers = []
in_channels = 16

for v in cfg:
if v == 'M':
layers += [nn.MaxPool3d(kernel_size=2, stride=2)]
else:
conv = nn.Conv3d(in_channels, v, kernel_size=3, padding=1) # padding = (k-1)//2
if batch_norm:
layers += [conv, nn.BatchNorm3d(v), nn.ReLU(inplace=True)]
else:
layers += [conv, nn.ReLU(inplace=True)]
in_channels = v

return nn.Sequential(*layers)


def vggnet(**kwargs):

# TODO: The last maxpool layer is (512, 3, 3) to (512, 1, 1). Fix it
# TODO: The last dense layers are 4096 to 1. There are two such layers.
# TODO: Use dropout to regularize. Looks like training loss is very less compared to val loss

model = VGGNet(make_layers(
cfgs['A'], batch_norm=True), init_weights=True, **kwargs)
#model = VGGNet(**kwargs)
return model
7 changes: 7 additions & 0 deletions pytorch/model_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import torch
from model import vggnet
from torchsummary import summary

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = vggnet().to(device)
summary(model, input_size=(16, 24, 24, 24))
File renamed without changes.
1 change: 1 addition & 0 deletions pytorch/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python main.py ../dataset --multiprocessing-distributed -j 8 --resume model_best.pth.tar --evaluate
2 changes: 2 additions & 0 deletions pytorch/train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#python main.py ../dataset --dist-url 'tcp://chanti00.utep.edu:12356' --world-size 2 --multiprocessing-distributed -j 8 --batch-size 128 --epochs 2
python main.py ../dataset --multiprocessing-distributed -j 8 --batch-size 128 --epochs 100
28 changes: 0 additions & 28 deletions torch/dataset.py

This file was deleted.

Loading

0 comments on commit c3e3a15

Please sign in to comment.