Skip to content

Commit

Permalink
Merge pull request #10 from pytorch-lumo/dev1
Browse files Browse the repository at this point in the history
Weakly update 2023.03.17
  • Loading branch information
sailist authored Mar 17, 2023
2 parents 575b1c3 + 13eef3a commit 6501baf
Show file tree
Hide file tree
Showing 59 changed files with 3,051 additions and 873 deletions.
336 changes: 211 additions & 125 deletions README.ch.md

Large diffs are not rendered by default.

310 changes: 203 additions & 107 deletions README.md

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions docstr-coverage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
docstr-coverage \
src/lumo/cli \
src/lumo/core \
src/lumo/data \
src/lumo/exp \
src/lumo/proc \
src/lumo/trainer \
src/lumo/utils

35 changes: 0 additions & 35 deletions examples/data/datamodule.py

This file was deleted.

259 changes: 259 additions & 0 deletions examples/imagenet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
import sys
from pathlib import Path
from typing import Union

import torch
from PIL import Image
from torch.utils.data import DataLoader
import os
import torch.multiprocessing as mp
from torchvision.datasets.folder import default_loader

from lumo import DatasetBuilder, MetricType, Trainer, TrainerParams, Meter, callbacks, DataModule
from torchvision.datasets import FakeData, ImageFolder
from torchvision import transforms
from torchvision.models.resnet import resnet18
from torch import nn
from lumo.proc.dist import is_dist, is_main
from torch.nn import functional as F
from lumo.proc import glob
from lumo.utils.subprocess import run_command

"""define transforms"""


def none(mean, std):
return transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean, std)
])


def standard(mean, std, resize=None):
return transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])


"""create datasets"""


def imagenet(split='train'):
"""
download from https://www.kaggle.com/c/imagenet-object-localization-challenge/overview/description
```
mkdir imagenet
cd ./imagenet
kaggle competitions download -c imagenet-object-localization-challenge
unzip imagenet-object-localization-challenge.zip
tar -xvf imagenet_object_localization_patched2019.tar.gz
ls
>>> ILSVRC LOC_synset_mapping.txt LOC_val_solution.csv imagenet_object_localization_patched2019.tar.gz
>>> LOC_sample_submission.csv LOC_train_solution.csv imagenet-object-localization-challenge.zip
```
"""
root = glob['imagenet']
if split == 'train':
file = Path(root).joinpath('ILSVRC', 'ImageSets', 'CLS-LOC', 'train_cls.txt')
train_root = os.path.join(root, 'ILSVRC/Data/CLS-LOC/train')
with file.open('r') as r:
lines = r.readlines()
imgs = [line.split(' ')[0] for line in lines]
name_cls_map = {name: i for i, name in enumerate(sorted(set([i.split('/')[0] for i in imgs])))}
xs = [os.path.join(train_root, f'{i}.JPEG') for i in imgs]
ys = [name_cls_map[i.split('/')[0]] for i in imgs]
else:
file = Path(root).joinpath('LOC_val_solution.csv')
val_root = os.path.join(root, 'ILSVRC/Data/CLS-LOC/val')

with file.open('r') as r:
r.readline()
lines = r.readlines()
lines = [line.split(',') for line in lines]
lines = [[img, res.split(' ')[0]] for img, res in lines]

name_cls_map = {name: i for i, name in enumerate(sorted(set([i[1] for i in lines])))}
xs = [os.path.join(val_root, f'{img}.JPEG') for img, _ in lines]
ys = [name_cls_map[res] for _, res in lines]

return list(xs), list(ys)


def take_first(item):
return item[0]


def take_second(item):
return item[1]


def make_dataset(dummy=False):
if dummy:
train_dataset = FakeData(1281167, (3, 224, 224), 1000, transforms.ToTensor())
val_dataset = FakeData(50000, (3, 224, 224), 1000, transforms.ToTensor())
ds = (
DatasetBuilder()
.add_input('fake', train_dataset)
.add_output('fake', 'xs', transform=take_first)
.add_output('fake', 'ys', transform=take_second)
)
test_ds = (
DatasetBuilder()
.add_input('fake', val_dataset)
.add_output('fake', 'xs', transform=take_first)
.add_output('fake', 'ys', transform=take_second)
)
else:
train_dataset = ImageFolder(os.path.join(glob['imagenet'], 'train'))
val_dataset = ImageFolder(os.path.join(glob['imagenet'], 'val'))

xs, ys = list(zip(*train_dataset.samples))
test_xs, test_ys = list(zip(*val_dataset.samples))

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

ds = (
DatasetBuilder()
.add_input('xs', xs, transform=default_loader) # 注册样本来源,命名为 'xs'
.add_input('ys', ys) # 注册标签来源,命名为 'ys'
.add_output('xs', 'xs', transform=standard(mean, std)) # 添加一个弱增广输出 'xs0'
.add_output('ys', 'ys') # 添加标签输出
)

print(ds)
print(ds[0].keys())

test_ds = (
DatasetBuilder()
.add_input('xs', test_xs, transform=default_loader) # 注册样本来源,命名为 'xs'
.add_input('ys', test_ys) # 注册标签来源,命名为 'ys'
.add_output('xs', 'xs', transform=none(mean, std)) # 测试样本不使用增广
.add_output('ys', 'ys') # 添加标签输出
)

print(test_ds)
print(test_ds[0].keys())
return ds, test_ds


class LargeParams(TrainerParams):
def __init__(self):
super().__init__()
self.optim = self.OPTIM.create_optim('SGD',
lr=0.06,
momentum=0.9,
weight_decay=5e-5,
)
self.lr_decay_end = 0.00001
self.batch_size = 512
self.dummy = False
self.multiprocessing_distributed = True


ParamsType = LargeParams


class LargeModel(nn.Module):

def __init__(self, feature_dim) -> None:
super().__init__()
self.backbone = resnet18()
in_feature = self.backbone.fc.in_features
self.backbone.fc = nn.Identity()
self.head = nn.Linear(in_feature, feature_dim, bias=True)

def forward(self, xs):
feature_map = self.backbone(xs)
feature = self.head(feature_map)
return feature


class LargeTrainer(Trainer):

def icallbacks(self, params: ParamsType):
callbacks.LoggerCallback().hook(self)

def imodels(self, params: ParamsType):
self.model = resnet18(num_classes=1000)
self.optim = params.optim.build(self.model.parameters())

self.lr_sche = params.SCHE.Cos(
start=params.optim.lr, end=params.lr_decay_end,
left=0,
right=len(self.train_dataloader) * params.epoch
)
# manually trigger send_to_device method
self.to_device()

def train_step(self, batch, params: ParamsType = None) -> MetricType:
super().train_step(batch, params)
m = Meter()
xs, ys = batch['xs'], batch['ys']
logits = self.model(xs)

Lall = F.cross_entropy(logits, ys)
self.optim.zero_grad()
self.accelerate.backward(Lall)
self.optim.step()

# change lr by training epoch
cur_lr = self.lr_sche.apply(self.optim, self.eidx)

with torch.no_grad():
m.mean.Lall = Lall
m.mean.Ax = torch.eq(logits.argmax(dim=-1), ys).float().mean()
m.last.lr = cur_lr
return m

def test_step(self, batch, params: ParamsType = None) -> MetricType:
m = Meter()
xs, ys = batch['xs'], batch['ys']
logits = self.model(xs)

all_logits = self.accelerate.gather(logits)
all_ys = self.accelerate.gather(ys)

m.test_mean.Ax = torch.eq(all_logits.argmax(dim=-1), all_ys).float()
return m


def main_worker(device, ngpus_per_node, args):
# create datamodule to contain dataloader
params = LargeParams()
params.device = device
params.from_args()

ds, test_ds = make_dataset(dummy=params.dummy)
dl = ds.DataLoader(batch_size=params.batch_size, num_workers=4)
test_dl = test_ds.DataLoader(batch_size=params.batch_size, num_workers=4)
dm = DataModule()
dm.regist_dataloader(train=dl, test=test_dl)

# with the input of params and dataloader, the initialization of models and optimizers in Trainer,
# then the output will be the trained parameters, metrics and logs.
trainer = LargeTrainer(params, dm=dm)

trainer.train() # or trainer.train(dm=dl) if dm are not given above
trainer.test() # or trainer.test(dm=dl)
trainer.save_last_model()


def main():
# if params.multiprocessing_distributed and not is_dist():
# mp.spawn(main, nprocs=torch.cuda.device_count(), args=(ngpus_per_node, args))
# command = ' '.join([
# 'accelerate', 'launch',
# *sys.argv,
# ])
# print(command)
# run_command(command)
# else: # not distributed or in distribution environment
pass


if __name__ == '__main__':
main()
Loading

0 comments on commit 6501baf

Please sign in to comment.