Skip to content

Commit

Permalink
perf(train): 使用epoch-iteration替代iteration训练模式;更新日志格式
Browse files Browse the repository at this point in the history
  • Loading branch information
zjykzj committed Nov 5, 2020
1 parent d518e3d commit a8494f4
Show file tree
Hide file tree
Showing 18 changed files with 224 additions and 146 deletions.
10 changes: 5 additions & 5 deletions configs/tsn_mbv2_ucf101_rgb_raw_dense_1x16x4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_mbv2_ucf101_rgb_raw_dense_1x16x4'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -80,7 +80,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_hmdb51_rgb_raw_seg_1x1x3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_hmdb51_rgb_raw_seg_1x1x3'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -80,7 +80,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_hmdb51_rgbdiff_raw_seg_1x1x3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_hmdb51_rgbdiff_raw_seg_1x1x3'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -80,7 +80,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_jester_rgb_raw_seg_1x1x3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_jester_rgb_raw_seg_1x1x3'
TRAIN:
MAX_ITER: 100000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -78,7 +78,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 45000, 85000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 1000
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_jester_rgbdiff_raw_seg_1x1x3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_jester_rgbdiff_raw_seg_1x1x3'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -78,7 +78,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 45000, 85000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 1000
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_ucf101_rgb_raw_dense_1x16x4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_ucf101_rgb_raw_dense_1x16x4'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 5
EVAL_EPOCH: 5
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -81,7 +81,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_ucf101_rgb_raw_seg_1x1x3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_ucf101_rgb_raw_seg_1x1x3'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -80,7 +80,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_ucf101_rgb_video_dense_1x16x4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_ucf101_rgb_video_dense_1x16x4'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -80,7 +80,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_r50_ucf101_rgbdiff_raw_seg_1x1x3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_r50_ucf101_rgbdiff_raw_seg_1x1x3'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -80,7 +80,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.2
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
10 changes: 5 additions & 5 deletions configs/tsn_sfv2_ucf101_rgb_raw_dense_1x16x4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ DIST_BACKEND: "nccl"
RNG_SEED: 1
OUTPUT_DIR: 'outputs/tsn_mbv2_ucf101_rgb_raw_dense_1x16x4'
TRAIN:
MAX_ITER: 30000
LOG_STEP: 10
SAVE_STEP: 1000
EVAL_STEP: 1000
MAX_EPOCH: 100
SAVE_EPOCH: 10
EVAL_EPOCH: 10
RESUME: False
USE_TENSORBOARD: True
DATASETS:
Expand Down Expand Up @@ -80,7 +80,7 @@ LR_SCHEDULER:
IS_WARMUP: True
GAMMA: 0.1
MULTISTEP_LR:
MILESTONES: [ 15000, 25000 ]
MILESTONES: [ 50, 80 ]
WARMUP:
ITERATION: 400
ITERATION: 5
MULTIPLIER: 1.0
7 changes: 4 additions & 3 deletions tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def train(cfg):

logger = logging.setup_logging(__name__)
logger.info('init start')
arguments = {"iteration": 0}
# 迭代轮数从1开始计数
arguments = {"cur_epoch": 1}

device = get_device(get_local_rank())
model = build_recognizer(cfg, device)
Expand All @@ -49,7 +50,7 @@ def train(cfg):
logger.info('resume start')
extra_checkpoint_data = checkpointer.load(map_location=device)
if isinstance(extra_checkpoint_data, dict):
arguments['iteration'] = extra_checkpoint_data['iteration']
arguments['cur_epoch'] = extra_checkpoint_data['cur_epoch']
if cfg.LR_SCHEDULER.IS_WARMUP:
logger.info('warmup start')
if lr_scheduler.finished:
Expand All @@ -61,7 +62,7 @@ def train(cfg):
logger.info('warmup end')
logger.info('resume end')

data_loader = build_dataloader(cfg, is_train=True, start_iter=arguments['iteration'])
data_loader = build_dataloader(cfg, is_train=True)

logger.info('init end')
synchronize()
Expand Down
12 changes: 6 additions & 6 deletions tsn/config/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
# Train
# ---------------------------------------------------------------------------- #
_C.TRAIN = CN()
_C.TRAIN.MAX_ITER = 30000
_C.TRAIN.LOG_STEP = 10
_C.TRAIN.SAVE_STEP = 1000
_C.TRAIN.EVAL_STEP = 1000
_C.TRAIN.SAVE_EPOCH = 10
_C.TRAIN.EVAL_EPOCH = 10
_C.TRAIN.MAX_EPOCH = 100
_C.TRAIN.RESUME = False
_C.TRAIN.USE_TENSORBOARD = True

Expand Down Expand Up @@ -150,14 +150,14 @@

# for SteLR
_C.LR_SCHEDULER.STEP_LR = CN()
_C.LR_SCHEDULER.STEP_LR.STEP_SIZE = 10000
_C.LR_SCHEDULER.STEP_LR.STEP_SIZE = 10
# for MultiStepLR
_C.LR_SCHEDULER.MULTISTEP_LR = CN()
_C.LR_SCHEDULER.MULTISTEP_LR.MILESTONES = [15000, 25000]
_C.LR_SCHEDULER.MULTISTEP_LR.MILESTONES = [50, 80]
# for CosineAnnealingLR
_C.LR_SCHEDULER.COSINE_ANNEALING_LR = CN()
_C.LR_SCHEDULER.COSINE_ANNEALING_LR.MINIMAL_LR = 3e-4
# for Warmup
_C.LR_SCHEDULER.WARMUP = CN()
_C.LR_SCHEDULER.WARMUP.ITERATION = 400
_C.LR_SCHEDULER.WARMUP.ITERATION = 5
_C.LR_SCHEDULER.WARMUP.MULTIPLIER = 1.0
40 changes: 24 additions & 16 deletions tsn/data/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,16 @@
@description:
"""

import torch
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import RandomSampler, SequentialSampler, BatchSampler

from .datasets.build import build_dataset
from .samplers import IterationBasedBatchSampler
from .transforms.build import build_transform
import tsn.util.distributed as du


def build_dataloader(cfg,
is_train=True,
start_iter=0):
def build_dataloader(cfg, is_train=True):
transform = build_transform(cfg, is_train=is_train)
dataset = build_dataset(cfg, transform=transform, is_train=is_train)

Expand All @@ -35,7 +32,7 @@ def build_dataloader(cfg,
rank=rank,
shuffle=True)
else:
sampler = torch.utils.data.RandomSampler(dataset)
sampler = RandomSampler(dataset)
else:
batch_size = cfg.DATALOADER.TEST_BATCH_SIZE
if num_gpus > 1:
Expand All @@ -44,19 +41,30 @@ def build_dataloader(cfg,
rank=rank,
shuffle=False)
else:
sampler = torch.utils.data.sampler.SequentialSampler(dataset)

batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler,
batch_size=batch_size,
drop_last=False)
if is_train:
batch_sampler = IterationBasedBatchSampler(batch_sampler,
num_iterations=cfg.TRAIN.MAX_ITER,
start_iter=start_iter)
sampler = SequentialSampler(dataset)

data_loader = DataLoader(dataset,
num_workers=cfg.DATALOADER.NUM_WORKERS,
batch_sampler=batch_sampler,
sampler=sampler,
batch_size=batch_size,
drop_last=False,
pin_memory=True)

return data_loader


def shuffle_dataset(loader, cur_epoch):
""""
Shuffles the data.
Args:
loader (loader): data loader to perform shuffle.
cur_epoch (int): number of the current epoch.
"""
sampler = loader.sampler
assert isinstance(
sampler, (RandomSampler, DistributedSampler)
), "Sampler type '{}' not supported".format(type(sampler))
# RandomSampler handles shuffling automatically
if isinstance(sampler, DistributedSampler):
# DistributedSampler shuffles data based on epoch
sampler.set_epoch(cur_epoch)
4 changes: 2 additions & 2 deletions tsn/engine/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def compute_on_dataset(images, targets, device, model, num_gpus, evaluator):


def inference(cfg, model, device, **kwargs):
iteration = kwargs.get('iteration', None)
cur_epoch = kwargs.get('cur_epoch', None)
dataset_name = cfg.DATASETS.TEST.NAME
num_gpus = cfg.NUM_GPUS

Expand All @@ -64,7 +64,7 @@ def inference(cfg, model, device, **kwargs):
output_dir = cfg.OUTPUT_DIR
result_path = os.path.join(output_dir,
'result_{}.txt'.format(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) \
if iteration is None else os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration))
if cur_epoch is None else os.path.join(output_dir, 'result_{:04d}.txt'.format(cur_epoch))

with open(result_path, "w") as f:
f.write(result_str)
Expand Down
Loading

0 comments on commit a8494f4

Please sign in to comment.