-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlog.txt
322 lines (320 loc) · 22.1 KB
/
log.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
nohup: ignoring input
/home/xmy/pytorch_code/crnn/utils/utils.py:10: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
config = yaml.load(f)
{'GPUID': 0, 'WORKERS': 1, 'PRINT_FREQ': 10, 'SAVE_FREQ': 1, 'EVAL_FREQ': 1, 'PIN_MEMORY': False, 'OUTPUT_DIR': 'output', 'BLANK_CHAR': '-', 'CUDNN': {'BENCHMARK': True, 'DETERMINISTIC': False, 'ENABLED': True}, 'DATASET': {'DATASET': '360CC', 'ROOT': '/home/xmy/pytorch_code/dataset/', 'CHAR_FILE': '/home/xmy/pytorch_code/dataset/french_img/french_dict.txt', 'JSON_FILE': {'train': 'french_img/french_training_mlt_2017.txt', 'val': 'french_img/french_eval_mlt_2017.txt'}, 'SCALE_FACTOR': 0.25, 'ROT_FACTOR': 30, 'STD': 0.193, 'MEAN': 0.588, 'ALPHABETS': ''}, 'TRAIN': {'BATCH_SIZE_PER_GPU': 32, 'SHUFFLE': True, 'BEGIN_EPOCH': 0, 'END_EPOCH': 10000, 'RESUME': {'IS_RESUME': False, 'FILE': ''}, 'OPTIMIZER': 'adam', 'LR': 0.0001, 'WD': 0.0, 'LR_STEP': [60, 80], 'LR_FACTOR': 0.1, 'MOMENTUM': 0.0, 'NESTEROV': False, 'RMSPROP_ALPHA': None, 'RMSPROP_CENTERED': None, 'FINETUNE': {'IS_FINETUNE': True, 'FINETUNE_CHECKPOINIT': 'output/checkpoints/mixed_second_finetune_acc_97P7.pth', 'FREEZE': True}}, 'TEST': {'BATCH_SIZE_PER_GPU': 16, 'SHUFFLE': True, 'NUM_TEST_BATCH': 1000, 'NUM_TEST_DISP': 10}, 'MODEL': {'NAME': 'crnn', 'IMAGE_SIZE': {'OW': 280, 'H': 32, 'W': 160}, 'NUM_CLASSES': 127, 'NUM_HIDDEN': 256}, 'ATTENTION': {'ENABLE': False}, 'DICT': ['-', '!', '"', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\xad', '°', '²', 'º', 'À', 'Á', 'Â', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Î', 'Ô', 'Ö', 'Ü', 'ß', 'à', 'á', 'â', 'ä', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'í', 'î', 'ï', 'ñ', 'ò', 'ó', 'ô', 'ö', 'ù', 'û', 'ü', 'Œ', 'œ', 'ε', '€', '™']}
load 1114039 images!
load 766 images!
128
layer name gradient parameters shape mu sigma
0 cnn.conv0.weight True 576 [64, 1, 3, 3] -0.000678 0.0196
1 cnn.conv0.bias True 64 [64] -0.0114 0.178
2 cnn.conv1.weight True 73728 [128, 64, 3, 3] 0.000118 0.02
3 cnn.conv1.bias True 128 [128] 0.00145 0.0239
4 cnn.conv2.weight True 294912 [256, 128, 3, 3] -7.72e-06 0.02
5 cnn.conv2.bias True 256 [256] 0.000772 0.0167
6 cnn.batchnorm2.weight True 256 [256] 1 0.0184
7 cnn.batchnorm2.bias True 256 [256] 0 0
8 cnn.conv3.weight True 589824 [256, 256, 3, 3] -1.67e-05 0.02
9 cnn.conv3.bias True 256 [256] -0.000214 0.0128
10 cnn.conv4.weight True 1.17965e+06 [512, 256, 3, 3] -2.26e-05 0.02
11 cnn.conv4.bias True 512 [512] -0.00056 0.0126
12 cnn.batchnorm4.weight True 512 [512] 0.999 0.0198
13 cnn.batchnorm4.bias True 512 [512] 0 0
14 cnn.conv5.weight True 2.3593e+06 [512, 512, 3, 3] -9.15e-06 0.02
15 cnn.conv5.bias True 512 [512] 0.000261 0.00822
16 cnn.conv6.weight True 1.04858e+06 [512, 512, 2, 2] -1.94e-06 0.02
17 cnn.conv6.bias True 512 [512] -0.000623 0.0127
18 cnn.batchnorm6.weight True 512 [512] 1 0.0197
19 cnn.batchnorm6.bias True 512 [512] 0 0
20 rnn.0.rnn.weight_ih_l0 True 524288 [1024, 512] 0.00013 0.0361
21 rnn.0.rnn.weight_hh_l0 True 262144 [1024, 256] 5.88e-05 0.0361
22 rnn.0.rnn.bias_ih_l0 True 1024 [1024] 0.00155 0.0361
23 rnn.0.rnn.bias_hh_l0 True 1024 [1024] -0.000865 0.0355
24 rnn.0.rnn.weight_ih_l0_reverse True 524288 [1024, 512] 2.82e-05 0.0361
25 rnn.0.rnn.weight_hh_l0_reverse True 262144 [1024, 256] 6.65e-05 0.0361
26 rnn.0.rnn.bias_ih_l0_reverse True 1024 [1024] 0.00142 0.0358
27 rnn.0.rnn.bias_hh_l0_reverse True 1024 [1024] 0.000104 0.037
28 rnn.0.embedding.weight True 131072 [256, 512] -1.59e-06 0.0256
29 rnn.0.embedding.bias True 256 [256] -0.000301 0.0268
30 rnn.1.rnn.weight_ih_l0 True 262144 [1024, 256] -0.000122 0.0361
31 rnn.1.rnn.weight_hh_l0 True 262144 [1024, 256] -3.48e-05 0.0361
32 rnn.1.rnn.bias_ih_l0 True 1024 [1024] -0.00031 0.0361
33 rnn.1.rnn.bias_hh_l0 True 1024 [1024] 0.000798 0.0363
34 rnn.1.rnn.weight_ih_l0_reverse True 262144 [1024, 256] 7.26e-05 0.0361
35 rnn.1.rnn.weight_hh_l0_reverse True 262144 [1024, 256] -8.81e-05 0.0361
36 rnn.1.rnn.bias_ih_l0_reverse True 1024 [1024] -0.000294 0.0361
37 rnn.1.rnn.bias_hh_l0_reverse True 1024 [1024] 0.000167 0.0358
38 rnn.1.embedding.weight True 65536 [128, 512] -1.5e-05 0.0256
39 rnn.1.embedding.bias True 128 [128] 0.000695 0.0261
40 attention.attention_cell.i2h.weight True 65536 [256, 256] -0.00016 0.0361
41 attention.attention_cell.h2h.weight True 65536 [256, 256] -0.000272 0.036
42 attention.attention_cell.h2h.bias True 256 [256] -0.00307 0.036
43 attention.attention_cell.score.weight True 256 [1, 256] -0.000658 0.0364
44 attention.attention_cell.rnn.weight_ih True 196608 [768, 256] -0.000151 0.0361
45 attention.attention_cell.rnn.weight_hh True 196608 [768, 256] 5.21e-05 0.0361
46 attention.attention_cell.rnn.bias_ih True 768 [768] -0.00031 0.036
47 attention.attention_cell.rnn.bias_hh True 768 [768] 1.84e-05 0.0363
48 attention.generator.weight True 32768 [128, 256] -1.67e-05 0.0361
49 attention.generator.bias True 128 [128] 0.00343 0.0374
Model Summary: 50 layers, 8.93722e+06 parameters, 8.93722e+06 gradients
CRNN(
(cnn): Sequential(
(conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu0): ReLU(inplace=True)
(pooling0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU(inplace=True)
(pooling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(batchnorm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu3): ReLU(inplace=True)
(pooling2): MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
(conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(batchnorm4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu4): ReLU(inplace=True)
(conv5): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu5): ReLU(inplace=True)
(pooling3): MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1), dilation=1, ceil_mode=False)
(conv6): Conv2d(512, 512, kernel_size=(2, 2), stride=(1, 1))
(batchnorm6): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu6): ReLU(inplace=True)
)
(rnn): Sequential(
(0): BidirectionalLSTM(
(rnn): LSTM(512, 256, bidirectional=True)
(embedding): Linear(in_features=512, out_features=256, bias=True)
)
(1): BidirectionalLSTM(
(rnn): LSTM(256, 256, bidirectional=True)
(embedding): Linear(in_features=512, out_features=128, bias=True)
)
)
(attention): Attention(
(attention_cell): AttentionCell(
(i2h): Linear(in_features=256, out_features=256, bias=False)
(h2h): Linear(in_features=256, out_features=256, bias=True)
(score): Linear(in_features=256, out_features=1, bias=False)
(rnn): GRUCell(256, 256)
)
(generator): Linear(in_features=256, out_features=128, bias=True)
)
)
/home/xmy/.conda/envs/xmy_pytorch/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:417: UserWarning: To get the last learning rate computed by the scheduler, please use `get_last_lr()`.
"please use `get_last_lr()`.", UserWarning)
epoch:0 step:0 loss:52.116085052490234 lr:[0.0001]
epoch:0 step:10 loss:40.117095947265625 lr:[0.0001]
epoch:0 step:20 loss:17.828449249267578 lr:[0.0001]
epoch:0 step:30 loss:4.975006580352783 lr:[0.0001]
epoch:0 step:40 loss:4.852877616882324 lr:[0.0001]
epoch:0 step:50 loss:4.268239974975586 lr:[0.0001]
epoch:0 step:60 loss:4.1609086990356445 lr:[0.0001]
epoch:0 step:70 loss:4.1336517333984375 lr:[0.0001]
epoch:0 step:80 loss:3.7844316959381104 lr:[0.0001]
epoch:0 step:90 loss:3.6845128536224365 lr:[0.0001]
epoch:0 step:100 loss:3.3444361686706543 lr:[0.0001]
epoch:0 step:110 loss:3.556560516357422 lr:[0.0001]
epoch:0 step:120 loss:3.6098265647888184 lr:[0.0001]
epoch:0 step:130 loss:3.576953649520874 lr:[0.0001]
epoch:0 step:140 loss:3.6649744510650635 lr:[0.0001]
epoch:0 step:150 loss:3.4827723503112793 lr:[0.0001]
epoch:0 step:160 loss:3.3926305770874023 lr:[0.0001]
epoch:0 step:170 loss:3.149014949798584 lr:[0.0001]
epoch:0 step:180 loss:3.546814203262329 lr:[0.0001]
epoch:0 step:190 loss:3.4127795696258545 lr:[0.0001]
epoch:0 step:200 loss:3.5992181301116943 lr:[0.0001]
epoch:0 step:210 loss:3.409351348876953 lr:[0.0001]
epoch:0 step:220 loss:3.337221145629883 lr:[0.0001]
epoch:0 step:230 loss:3.5305778980255127 lr:[0.0001]
epoch:0 step:240 loss:3.3767147064208984 lr:[0.0001]
epoch:0 step:250 loss:3.4931416511535645 lr:[0.0001]
epoch:0 step:260 loss:3.5262057781219482 lr:[0.0001]
epoch:0 step:270 loss:3.638775110244751 lr:[0.0001]
epoch:0 step:280 loss:3.215933322906494 lr:[0.0001]
epoch:0 step:290 loss:3.3103015422821045 lr:[0.0001]
epoch:0 step:300 loss:3.551041603088379 lr:[0.0001]
epoch:0 step:310 loss:3.4280881881713867 lr:[0.0001]
epoch:0 step:320 loss:3.3087568283081055 lr:[0.0001]
epoch:0 step:330 loss:3.2428324222564697 lr:[0.0001]
epoch:0 step:340 loss:3.5170891284942627 lr:[0.0001]
epoch:0 step:350 loss:3.8188509941101074 lr:[0.0001]
epoch:0 step:360 loss:3.282607316970825 lr:[0.0001]
epoch:0 step:370 loss:3.280571460723877 lr:[0.0001]
epoch:0 step:380 loss:3.373042583465576 lr:[0.0001]
epoch:0 step:390 loss:3.115152359008789 lr:[0.0001]
epoch:0 step:400 loss:3.1178078651428223 lr:[0.0001]
epoch:0 step:410 loss:3.530694007873535 lr:[0.0001]
epoch:0 step:420 loss:3.0471224784851074 lr:[0.0001]
epoch:0 step:430 loss:3.0546951293945312 lr:[0.0001]
epoch:0 step:440 loss:3.242072105407715 lr:[0.0001]
epoch:0 step:450 loss:3.4129416942596436 lr:[0.0001]
epoch:0 step:460 loss:3.2841014862060547 lr:[0.0001]
epoch:0 step:470 loss:3.3703696727752686 lr:[0.0001]
epoch:0 step:480 loss:3.3219523429870605 lr:[0.0001]
epoch:0 step:490 loss:3.260223627090454 lr:[0.0001]
epoch:0 step:500 loss:3.377300500869751 lr:[0.0001]
epoch:0 step:510 loss:3.2311880588531494 lr:[0.0001]
epoch:0 step:520 loss:3.132509231567383 lr:[0.0001]
epoch:0 step:530 loss:3.2282073497772217 lr:[0.0001]
epoch:0 step:540 loss:3.2032241821289062 lr:[0.0001]
epoch:0 step:550 loss:3.074181079864502 lr:[0.0001]
epoch:0 step:560 loss:3.300992012023926 lr:[0.0001]
epoch:0 step:570 loss:3.4088134765625 lr:[0.0001]
epoch:0 step:580 loss:3.079993486404419 lr:[0.0001]
epoch:0 step:590 loss:3.2780089378356934 lr:[0.0001]
epoch:0 step:600 loss:3.150749683380127 lr:[0.0001]
epoch:0 step:610 loss:3.1403372287750244 lr:[0.0001]
epoch:0 step:620 loss:2.963975191116333 lr:[0.0001]
epoch:0 step:630 loss:3.327617645263672 lr:[0.0001]
epoch:0 step:640 loss:3.357861280441284 lr:[0.0001]
epoch:0 step:650 loss:3.3536860942840576 lr:[0.0001]
epoch:0 step:660 loss:3.021942377090454 lr:[0.0001]
epoch:0 step:670 loss:3.1346914768218994 lr:[0.0001]
epoch:0 step:680 loss:2.9873874187469482 lr:[0.0001]
epoch:0 step:690 loss:3.111438035964966 lr:[0.0001]
epoch:0 step:700 loss:3.421874761581421 lr:[0.0001]
epoch:0 step:710 loss:3.04634165763855 lr:[0.0001]
epoch:0 step:720 loss:3.1103615760803223 lr:[0.0001]
epoch:0 step:730 loss:2.9594967365264893 lr:[0.0001]
epoch:0 step:740 loss:3.0390779972076416 lr:[0.0001]
epoch:0 step:750 loss:3.07448410987854 lr:[0.0001]
epoch:0 step:760 loss:3.0625946521759033 lr:[0.0001]
epoch:0 step:770 loss:3.3562490940093994 lr:[0.0001]
epoch:0 step:780 loss:3.3822007179260254 lr:[0.0001]
epoch:0 step:790 loss:3.1535747051239014 lr:[0.0001]
epoch:0 step:800 loss:3.18884015083313 lr:[0.0001]
epoch:0 step:810 loss:3.0545096397399902 lr:[0.0001]
epoch:0 step:820 loss:3.0073938369750977 lr:[0.0001]
epoch:0 step:830 loss:3.287209987640381 lr:[0.0001]
epoch:0 step:840 loss:3.109248161315918 lr:[0.0001]
epoch:0 step:850 loss:3.1145637035369873 lr:[0.0001]
epoch:0 step:860 loss:2.86651873588562 lr:[0.0001]
epoch:0 step:870 loss:2.7457683086395264 lr:[0.0001]
epoch:0 step:880 loss:3.2297887802124023 lr:[0.0001]
epoch:0 step:890 loss:3.0989131927490234 lr:[0.0001]
epoch:0 step:900 loss:3.200568199157715 lr:[0.0001]
epoch:0 step:910 loss:2.662416458129883 lr:[0.0001]
epoch:0 step:920 loss:2.9158096313476562 lr:[0.0001]
epoch:0 step:930 loss:3.160327434539795 lr:[0.0001]
epoch:0 step:940 loss:3.1418256759643555 lr:[0.0001]
epoch:0 step:950 loss:3.158902645111084 lr:[0.0001]
epoch:0 step:960 loss:3.278364896774292 lr:[0.0001]
epoch:0 step:970 loss:2.890455722808838 lr:[0.0001]
epoch:0 step:980 loss:3.228557825088501 lr:[0.0001]
epoch:0 step:990 loss:3.198761463165283 lr:[0.0001]
epoch:0 step:1000 loss:2.8755388259887695 lr:[0.0001]
epoch:0 step:1010 loss:3.3236026763916016 lr:[0.0001]
epoch:0 step:1020 loss:2.7903687953948975 lr:[0.0001]
epoch:0 step:1030 loss:2.91058349609375 lr:[0.0001]
epoch:0 step:1040 loss:2.9671144485473633 lr:[0.0001]
epoch:0 step:1050 loss:2.9978561401367188 lr:[0.0001]
epoch:0 step:1060 loss:2.693514347076416 lr:[0.0001]
epoch:0 step:1070 loss:2.7668089866638184 lr:[0.0001]
epoch:0 step:1080 loss:2.5095176696777344 lr:[0.0001]
epoch:0 step:1090 loss:3.131608247756958 lr:[0.0001]
epoch:0 step:1100 loss:2.795675039291382 lr:[0.0001]
epoch:0 step:1110 loss:3.352942943572998 lr:[0.0001]
epoch:0 step:1120 loss:2.893986225128174 lr:[0.0001]
epoch:0 step:1130 loss:2.65913462638855 lr:[0.0001]
epoch:0 step:1140 loss:2.691993236541748 lr:[0.0001]
epoch:0 step:1150 loss:2.5728373527526855 lr:[0.0001]
epoch:0 step:1160 loss:2.826155662536621 lr:[0.0001]
epoch:0 step:1170 loss:2.629695177078247 lr:[0.0001]
epoch:0 step:1180 loss:3.090916872024536 lr:[0.0001]
epoch:0 step:1190 loss:3.212348461151123 lr:[0.0001]
epoch:0 step:1200 loss:2.996096134185791 lr:[0.0001]
epoch:0 step:1210 loss:2.989936351776123 lr:[0.0001]
epoch:0 step:1220 loss:2.6193621158599854 lr:[0.0001]
epoch:0 step:1230 loss:2.8635053634643555 lr:[0.0001]
epoch:0 step:1240 loss:2.804800271987915 lr:[0.0001]
epoch:0 step:1250 loss:2.5032973289489746 lr:[0.0001]
epoch:0 step:1260 loss:2.9435880184173584 lr:[0.0001]
epoch:0 step:1270 loss:2.8636107444763184 lr:[0.0001]
epoch:0 step:1280 loss:2.8672049045562744 lr:[0.0001]
epoch:0 step:1290 loss:2.8131165504455566 lr:[0.0001]
epoch:0 step:1300 loss:2.5916292667388916 lr:[0.0001]
epoch:0 step:1310 loss:2.7277092933654785 lr:[0.0001]
epoch:0 step:1320 loss:2.6451048851013184 lr:[0.0001]
epoch:0 step:1330 loss:2.710581064224243 lr:[0.0001]
epoch:0 step:1340 loss:2.751511335372925 lr:[0.0001]
epoch:0 step:1350 loss:2.7526001930236816 lr:[0.0001]
epoch:0 step:1360 loss:2.916168212890625 lr:[0.0001]
epoch:0 step:1370 loss:2.823166847229004 lr:[0.0001]
epoch:0 step:1380 loss:3.0230510234832764 lr:[0.0001]
epoch:0 step:1390 loss:2.9068477153778076 lr:[0.0001]
epoch:0 step:1400 loss:2.504387378692627 lr:[0.0001]
epoch:0 step:1410 loss:2.6986453533172607 lr:[0.0001]
epoch:0 step:1420 loss:2.8052799701690674 lr:[0.0001]
epoch:0 step:1430 loss:2.5450215339660645 lr:[0.0001]
epoch:0 step:1440 loss:2.9992854595184326 lr:[0.0001]
epoch:0 step:1450 loss:2.737156629562378 lr:[0.0001]
epoch:0 step:1460 loss:2.6878278255462646 lr:[0.0001]
epoch:0 step:1470 loss:2.9638028144836426 lr:[0.0001]
epoch:0 step:1480 loss:2.5162625312805176 lr:[0.0001]
epoch:0 step:1490 loss:2.8292810916900635 lr:[0.0001]
epoch:0 step:1500 loss:2.6997060775756836 lr:[0.0001]
epoch:0 step:1510 loss:2.5786292552948 lr:[0.0001]
epoch:0 step:1520 loss:2.802767038345337 lr:[0.0001]
epoch:0 step:1530 loss:2.646484136581421 lr:[0.0001]
epoch:0 step:1540 loss:2.504781723022461 lr:[0.0001]
epoch:0 step:1550 loss:2.6350083351135254 lr:[0.0001]
epoch:0 step:1560 loss:2.503735065460205 lr:[0.0001]
epoch:0 step:1570 loss:2.6105294227600098 lr:[0.0001]
epoch:0 step:1580 loss:2.7309470176696777 lr:[0.0001]
epoch:0 step:1590 loss:2.484358072280884 lr:[0.0001]
epoch:0 step:1600 loss:2.364793539047241 lr:[0.0001]
epoch:0 step:1610 loss:2.5830798149108887 lr:[0.0001]
epoch:0 step:1620 loss:2.5444869995117188 lr:[0.0001]
epoch:0 step:1630 loss:2.4025800228118896 lr:[0.0001]
epoch:0 step:1640 loss:2.400402784347534 lr:[0.0001]
epoch:0 step:1650 loss:2.514221668243408 lr:[0.0001]
epoch:0 step:1660 loss:2.4697723388671875 lr:[0.0001]
epoch:0 step:1670 loss:2.3425967693328857 lr:[0.0001]
epoch:0 step:1680 loss:2.708733081817627 lr:[0.0001]
epoch:0 step:1690 loss:2.723647117614746 lr:[0.0001]
epoch:0 step:1700 loss:2.5626840591430664 lr:[0.0001]
epoch:0 step:1710 loss:2.480067491531372 lr:[0.0001]
epoch:0 step:1720 loss:2.1066274642944336 lr:[0.0001]
epoch:0 step:1730 loss:2.434966564178467 lr:[0.0001]
epoch:0 step:1740 loss:2.8200371265411377 lr:[0.0001]
epoch:0 step:1750 loss:2.448164701461792 lr:[0.0001]
epoch:0 step:1760 loss:2.705467939376831 lr:[0.0001]
epoch:0 step:1770 loss:2.107234001159668 lr:[0.0001]
epoch:0 step:1780 loss:2.20770001411438 lr:[0.0001]
epoch:0 step:1790 loss:2.672931432723999 lr:[0.0001]
epoch:0 step:1800 loss:2.2567243576049805 lr:[0.0001]
epoch:0 step:1810 loss:2.3538737297058105 lr:[0.0001]
epoch:0 step:1820 loss:2.3458662033081055 lr:[0.0001]
epoch:0 step:1830 loss:2.6216955184936523 lr:[0.0001]
epoch:0 step:1840 loss:2.896329879760742 lr:[0.0001]
epoch:0 step:1850 loss:1.9171963930130005 lr:[0.0001]
epoch:0 step:1860 loss:2.911867380142212 lr:[0.0001]
epoch:0 step:1870 loss:2.353778839111328 lr:[0.0001]
epoch:0 step:1880 loss:2.6749398708343506 lr:[0.0001]
epoch:0 step:1890 loss:2.4225828647613525 lr:[0.0001]
epoch:0 step:1900 loss:2.383204460144043 lr:[0.0001]
epoch:0 step:1910 loss:2.0943076610565186 lr:[0.0001]
epoch:0 step:1920 loss:2.2485828399658203 lr:[0.0001]
epoch:0 step:1930 loss:2.2033817768096924 lr:[0.0001]
epoch:0 step:1940 loss:2.4147608280181885 lr:[0.0001]
epoch:0 step:1950 loss:1.9699718952178955 lr:[0.0001]
epoch:0 step:1960 loss:1.9803935289382935 lr:[0.0001]
epoch:0 step:1970 loss:1.9231431484222412 lr:[0.0001]
epoch:0 step:1980 loss:2.5765674114227295 lr:[0.0001]
epoch:0 step:1990 loss:2.275883197784424 lr:[0.0001]
epoch:0 step:2000 loss:2.1890408992767334 lr:[0.0001]
epoch:0 step:2010 loss:2.256505012512207 lr:[0.0001]
epoch:0 step:2020 loss:2.397461414337158 lr:[0.0001]
epoch:0 step:2030 loss:2.197068929672241 lr:[0.0001]
epoch:0 step:2040 loss:2.350250720977783 lr:[0.0001]
epoch:0 step:2050 loss:1.8081855773925781 lr:[0.0001]
epoch:0 step:2060 loss:1.6994335651397705 lr:[0.0001]
epoch:0 step:2070 loss:2.162252426147461 lr:[0.0001]
epoch:0 step:2080 loss:1.9320565462112427 lr:[0.0001]
epoch:0 step:2090 loss:2.183896541595459 lr:[0.0001]
epoch:0 step:2100 loss:2.151423215866089 lr:[0.0001]
epoch:0 step:2110 loss:1.9156275987625122 lr:[0.0001]
epoch:0 step:2120 loss:2.2778403759002686 lr:[0.0001]
epoch:0 step:2130 loss:1.9177744388580322 lr:[0.0001]
epoch:0 step:2140 loss:2.0441112518310547 lr:[0.0001]