-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain.py
185 lines (147 loc) · 5.34 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os
from argparse import Namespace
from args import Custom_arguments_parser
import random
import wandb
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tqdm import tqdm
from model import model_selection
from util.data import CustomDataset, HoDataLoad # hobbang: Dataset, DataLoader 코드 하나로 합체
from util.augmentation import TransformSelector
from util.optimizers import get_optimizer
from util.losses import CustomLoss
from util.schedulers import get_scheduler
from trainer import Trainer
from model.model_selection import ModelSelector
def run_train(args:Namespace) -> None:
## device와 seed 설정
device = torch.device(args.device)
early_stopping = args.early_stopping
## 데이터 경로 및 CSV 파일 경로
data_root = args.data_root
train_data_dir = data_root + "/train/"
train_data_info_file = args.csv_path
val_data_info_file = args.val_csv
save_result_path = "./train_result"
## 데이터 증강
transform_type = args.transform_type
stratify_column = args.stratify
height = args.height
width = args.width
## 모델, 옵티마이저, 손실 함수(로스 함수)
model_type = args.model
loss_type = args.loss
optimizer_type = args.optim
## 학습률, 클래스 개수, 에포크, 배치 크기, 돌려서 학습
epochs = args.epochs
batch_size = args.batch
lr = args.lr
num_classes = args.num_classes
r_epoch = args.r_epochs
## 학습 재개 정보
resume = args.resume
weights_path = args.checkpoint_path
# 출력 관련 (progress bar)
verbose = args.verbose
## 데이터 증강 및 세팅
transform_selector = TransformSelector(transform_type=transform_type)
# train_df = pd.read_csv(train_data_info_file)
# val_df = pd.read_csv(val_data_info_file)
train_info = pd.read_csv(train_data_info_file)
train_df, val_df = train_test_split(
train_info,
test_size=0.2,
stratify=train_info[stratify_column],
random_state=42
)
train_transform = transform_selector.get_transform(augment=True, height=height, width=width, augment_list=args.augmentations, adjust_ratio=args.adjust_ratio)
val_transform = transform_selector.get_transform(augment=False, height=height, width=width, augment_list=args.augmentations, adjust_ratio=args.adjust_ratio)
train_dataset = CustomDataset(train_data_dir, train_df, transform=train_transform)
val_dataset = CustomDataset(train_data_dir, val_df, transform=val_transform)
train_dataloader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=False
)
val_dataloader = DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=False
)
## 학습 모델
if 'timm' in model_type:
model_selector = ModelSelector(
"timm",
num_classes,
model_name=model_type.split("-")[-1],
pretrained=True
)
else:
model_selector = ModelSelector(
model_type,
num_classes,
)
model = model_selector.get_model()
model.to(device)
## 옵티마이저
optimizer = get_optimizer(model, optimizer_type, lr)
## 손실 함수
if loss_type == 'CE':
loss = CustomLoss()
## Scheduler 관련
scheduler = get_scheduler(args.lr_scheduler, optimizer, args.lr_scheduler_gamma, args.lr_scheduler_epochs_per_decay, len(train_dataloader))
model.to(device)
config = {'epoches': epochs, 'batch_size': batch_size, 'learning_rate': lr,
'model': model, 'device': device,
'optimizer': optimizer, 'scheduler': scheduler, 'loss_fn': loss}
wandb.init(project='Project1', config=config)
## 학습 시작
trainer = Trainer(
model=model,
device=device,
resume=resume,
weights_path=weights_path,
train_loader=train_dataloader,
val_loader=val_dataloader,
optimizer=optimizer,
scheduler=scheduler,
loss_fn=loss,
epochs=epochs,
result_path=save_result_path,
train_total=train_df.shape[0],
val_total=val_df.shape[0],
r_epoch=r_epoch,
early_stopping=early_stopping,
verbose=verbose,
args=args
)
trainer.train()
matrics_info = None
if __name__=='__main__':
## 설정 및 하이퍼파라미터 가져오기
train_parser = Custom_arguments_parser(mode='train')
args = train_parser.get_parser()
# cuda 적용
if args.device.lower() == 'cuda':
device = 'cuda' if torch.cuda.is_available() else 'cpu'
assert device == 'cuda', 'cuda로 수행하려고 하였으나 cuda를 찾을 수 없습니다.'
else:
device = 'cpu'
# seed값 설정
seed = args.seed
deterministic = True
random.seed(seed) # random seed 고정
np.random.seed(seed) # numpy random seed 고정
torch.manual_seed(seed) # torch random seed 고정
if device == 'cuda':
torch.cuda.manual_seed_all(seed)
if deterministic:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
run_train(args)