-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathptb_train_until.log
968 lines (968 loc) · 122 KB
/
ptb_train_until.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
Looking in indexes: https://mirror.baidu.com/pypi/simple/
Collecting progress
Downloading https://mirror.baidu.com/pypi/packages/2a/68/d8412d1e0d70edf9791cbac5426dc859f4649afc22f2abbeb0d947cf70fd/progress-1.6.tar.gz
Building wheels for collected packages: progress
Building wheel for progress (setup.py) ... [?25l- done
[?25h Created wheel for progress: filename=progress-1.6-cp37-none-any.whl size=9618 sha256=a88f9c7b6e5017845a051ea35fecb977d22588c9bb3aa7f83e66afd528e6f7be
Stored in directory: /home/aistudio/.cache/pip/wheels/93/3b/ad/174fee3e8ee80a61b3907cb370ae9cfe2138e215cf56583160
Successfully built progress
Installing collected packages: progress
Successfully installed progress-1.6
Read 887521 words from data/ptb.train.txt
Read 70390 words from data/ptb.valid.txt
Read 78669 words from data/ptb.test.txt
vacab size is 10000
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 858.5256851359268, 'validate_perplexity': 485.6423697966673}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 380.9968034160644, 'validate_perplexity': 307.7869390667442}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 266.60145660608424, 'validate_perplexity': 245.2216404557344}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 213.0288886475339, 'validate_perplexity': 213.75132952324321}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 180.9682894132847, 'validate_perplexity': 194.9516946566187}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 158.27800136033852, 'validate_perplexity': 179.07397289995978}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 141.88218961939378, 'validate_perplexity': 164.82615298821102}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 129.87957383597282, 'validate_perplexity': 162.4238837110198}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 119.40149047967704, 'validate_perplexity': 152.63361999322714}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 112.19449382448678, 'validate_perplexity': 150.4844488335424}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 104.6746004561156, 'validate_perplexity': 143.08542775175906}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 99.3181754838364, 'validate_perplexity': 139.19706662625978}
{'epoch': 13, 'learning_rate': 0.01, 'train_perplexity': 94.52166605781541, 'validate_perplexity': 135.56910704806782}
{'epoch': 14, 'learning_rate': 0.01, 'train_perplexity': 90.04786978288345, 'validate_perplexity': 141.46705998334102}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 79.12907647429027, 'validate_perplexity': 129.41579443933384}
{'epoch': 16, 'learning_rate': 0.006666666666666667, 'train_perplexity': 76.24257258210055, 'validate_perplexity': 133.04811512923}
{'epoch': 17, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 69.63647645126863, 'validate_perplexity': 128.2180827852778}
{'epoch': 18, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 68.12469000625806, 'validate_perplexity': 124.07422015940651}
{'epoch': 19, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 66.19661556966655, 'validate_perplexity': 124.15127456074333}
{'epoch': 20, 'learning_rate': 0.002962962962962963, 'train_perplexity': 61.88188276598271, 'validate_perplexity': 124.5076358753576}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 59.124627172763425, 'validate_perplexity': 123.89916154151922}
{'epoch': 22, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 58.237024525990094, 'validate_perplexity': 122.58287672019033}
{'epoch': 23, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 57.70746511801446, 'validate_perplexity': 126.87847906986356}
{'epoch': 24, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 56.00883003224843, 'validate_perplexity': 123.05285322342198}
{'epoch': 25, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 55.200079506420806, 'validate_perplexity': 122.88204818577597}
{'epoch': 26, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 54.50588397032268, 'validate_perplexity': 123.44302159132897}
{'epoch': 27, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 53.51555517187641, 'validate_perplexity': 123.53900430549201}
{'epoch': 28, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 52.87012982375218, 'validate_perplexity': 122.56446570069404}
{'epoch': 29, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 52.65254626976703, 'validate_perplexity': 125.11008990149888}
{'epoch': 30, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 51.94357946284327, 'validate_perplexity': 122.97343132703352}
{'epoch': 31, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 51.68380148429554, 'validate_perplexity': 124.56220873978}
{'epoch': 32, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 51.476831374177394, 'validate_perplexity': 123.60759223593026}
{'epoch': 33, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 51.33586032881596, 'validate_perplexity': 124.63106760168624}
{'epoch': 34, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 51.18552374834823, 'validate_perplexity': 123.6631858075374}
{'epoch': 35, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 51.08220282932827, 'validate_perplexity': 123.70429277808296}
{'epoch': 36, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 50.56605379415995, 'validate_perplexity': 123.81653656993727}
{'epoch': 37, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 50.80752926965467, 'validate_perplexity': 123.79634642497004}
{'epoch': 38, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 50.82169187669877, 'validate_perplexity': 123.76712967766183}
{'epoch': 39, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 50.633865361995696, 'validate_perplexity': 123.51344088404484}
{'epoch': 40, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 50.79649510691359, 'validate_perplexity': 123.66141680412748}
{'epoch': 41, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 50.76433892960894, 'validate_perplexity': 123.41147547313956}
{'epoch': 42, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 50.56102673930797, 'validate_perplexity': 123.76642147849127}
{'epoch': 43, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 50.45991681036378, 'validate_perplexity': 123.7057084694658}
{'epoch': 44, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 50.535886911874215, 'validate_perplexity': 123.71473388289867}
{'epoch': 45, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 50.57848494112867, 'validate_perplexity': 123.63842206247786}
{'epoch': 46, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 50.20134660023084, 'validate_perplexity': 123.78442279822663}
Perplexity on Test: 118.514056
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 846.7895656603342, 'validate_perplexity': 451.49253624669177}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 336.86511503687774, 'validate_perplexity': 270.3097076533725}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 231.83399038676018, 'validate_perplexity': 214.1765735954655}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 183.97096504190569, 'validate_perplexity': 188.55583839461423}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 158.24087308219956, 'validate_perplexity': 168.66860731664184}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 139.97508243106995, 'validate_perplexity': 154.95750248265128}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 126.86432277228249, 'validate_perplexity': 149.79294573149247}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 117.40559181875861, 'validate_perplexity': 150.8552405911777}
{'epoch': 9, 'learning_rate': 0.006666666666666667, 'train_perplexity': 103.35614398865272, 'validate_perplexity': 137.90653009793212}
{'epoch': 10, 'learning_rate': 0.006666666666666667, 'train_perplexity': 97.31409129480946, 'validate_perplexity': 134.14521674640716}
{'epoch': 11, 'learning_rate': 0.006666666666666667, 'train_perplexity': 93.47619939439814, 'validate_perplexity': 130.8925521221059}
{'epoch': 12, 'learning_rate': 0.006666666666666667, 'train_perplexity': 89.53425386916251, 'validate_perplexity': 132.50533459778802}
{'epoch': 13, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 82.60613552005407, 'validate_perplexity': 125.86085165142136}
{'epoch': 14, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 79.81855321138339, 'validate_perplexity': 123.20115853426633}
{'epoch': 15, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 77.50849732422523, 'validate_perplexity': 124.74902998035529}
{'epoch': 16, 'learning_rate': 0.002962962962962963, 'train_perplexity': 73.1007165547244, 'validate_perplexity': 122.55020636710327}
{'epoch': 17, 'learning_rate': 0.002962962962962963, 'train_perplexity': 71.73821608100027, 'validate_perplexity': 121.75844785115441}
{'epoch': 18, 'learning_rate': 0.002962962962962963, 'train_perplexity': 69.96438428784688, 'validate_perplexity': 120.61463538691159}
{'epoch': 19, 'learning_rate': 0.002962962962962963, 'train_perplexity': 68.87068827251642, 'validate_perplexity': 121.9008335319948}
{'epoch': 20, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 65.94397910421156, 'validate_perplexity': 118.44964958452074}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 65.14305347244272, 'validate_perplexity': 118.67403571056688}
{'epoch': 22, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 63.55217301249654, 'validate_perplexity': 118.3121976894404}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 62.80982098637391, 'validate_perplexity': 118.9050268623492}
{'epoch': 24, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 61.69238075565664, 'validate_perplexity': 117.2856807790694}
{'epoch': 25, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 60.99937911067042, 'validate_perplexity': 118.67833649093124}
{'epoch': 26, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 60.14268015437, 'validate_perplexity': 116.89869632579813}
{'epoch': 27, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 59.87916102001007, 'validate_perplexity': 117.40743928380316}
{'epoch': 28, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 59.291453458489116, 'validate_perplexity': 117.03149254654763}
{'epoch': 29, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 59.39632348862972, 'validate_perplexity': 117.49598372268602}
{'epoch': 30, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 58.74152105075097, 'validate_perplexity': 116.94067731870246}
{'epoch': 31, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 58.76886530977888, 'validate_perplexity': 117.5504540551727}
{'epoch': 32, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 58.325120651081754, 'validate_perplexity': 116.84508516317153}
{'epoch': 33, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 58.390180395340494, 'validate_perplexity': 117.25218578654676}
{'epoch': 34, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 58.28389065045379, 'validate_perplexity': 117.04717479214159}
{'epoch': 35, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 58.21250915068171, 'validate_perplexity': 116.8936796856505}
{'epoch': 36, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 57.82304230255268, 'validate_perplexity': 117.45189917810534}
{'epoch': 37, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 58.060179215362865, 'validate_perplexity': 117.05610512355494}
{'epoch': 38, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 57.774866220799844, 'validate_perplexity': 117.06676659884812}
{'epoch': 39, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 58.05489157324028, 'validate_perplexity': 117.1928241867984}
{'epoch': 40, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 57.85906277802432, 'validate_perplexity': 117.11751959930334}
{'epoch': 41, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 57.46752123821686, 'validate_perplexity': 116.90471657816612}
{'epoch': 42, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 57.879372104005185, 'validate_perplexity': 116.97754158543553}
{'epoch': 43, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 57.6038189769704, 'validate_perplexity': 116.95205325068034}
{'epoch': 44, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 57.783324445208734, 'validate_perplexity': 117.02267569432027}
{'epoch': 45, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 57.83015636133756, 'validate_perplexity': 117.12662284866242}
Perplexity on Test: 112.107272
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 833.8779872154358, 'validate_perplexity': 457.5701971302307}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 345.14136485340237, 'validate_perplexity': 271.8148499253912}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 231.48790502233686, 'validate_perplexity': 213.6902854201905}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 186.9617596935319, 'validate_perplexity': 192.9517590630761}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 162.26464769580548, 'validate_perplexity': 172.9215857413614}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 144.50757498979067, 'validate_perplexity': 163.33662811790478}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 133.1395033918858, 'validate_perplexity': 163.87265339459546}
{'epoch': 8, 'learning_rate': 0.006666666666666667, 'train_perplexity': 115.09189277905291, 'validate_perplexity': 143.09709529216943}
{'epoch': 9, 'learning_rate': 0.006666666666666667, 'train_perplexity': 109.01145626355084, 'validate_perplexity': 138.41142876045868}
{'epoch': 10, 'learning_rate': 0.006666666666666667, 'train_perplexity': 103.07639273065523, 'validate_perplexity': 139.4940098018714}
{'epoch': 11, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 94.12483606332985, 'validate_perplexity': 131.99176627215914}
{'epoch': 12, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 91.06042041210556, 'validate_perplexity': 126.7598932897304}
{'epoch': 13, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 87.90530761063154, 'validate_perplexity': 127.7958799656974}
{'epoch': 14, 'learning_rate': 0.002962962962962963, 'train_perplexity': 81.985999924607, 'validate_perplexity': 123.395587758043}
{'epoch': 15, 'learning_rate': 0.002962962962962963, 'train_perplexity': 80.2481134135729, 'validate_perplexity': 122.94716417368333}
{'epoch': 16, 'learning_rate': 0.002962962962962963, 'train_perplexity': 78.09990452557804, 'validate_perplexity': 125.39844636753772}
{'epoch': 17, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 75.04497329986023, 'validate_perplexity': 120.87349364140732}
{'epoch': 18, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 73.75563566832555, 'validate_perplexity': 121.13146343884742}
{'epoch': 19, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 71.37527269004303, 'validate_perplexity': 121.79990900063532}
{'epoch': 20, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 69.72838279491788, 'validate_perplexity': 120.43452256713607}
{'epoch': 21, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 69.3741521611315, 'validate_perplexity': 120.21190925890976}
{'epoch': 22, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 68.53248020592349, 'validate_perplexity': 119.84812532435338}
{'epoch': 23, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 67.92096083112875, 'validate_perplexity': 119.8378962618348}
{'epoch': 24, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 67.19774525504339, 'validate_perplexity': 119.64525024427789}
{'epoch': 25, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 66.72229214095897, 'validate_perplexity': 119.63846133206029}
{'epoch': 26, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 66.00145330965152, 'validate_perplexity': 118.51298192848083}
{'epoch': 27, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 65.41863218266467, 'validate_perplexity': 119.20374810824852}
{'epoch': 28, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 65.13954348998718, 'validate_perplexity': 119.05043353031512}
{'epoch': 29, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 65.49363441001944, 'validate_perplexity': 118.42259818190738}
{'epoch': 30, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 64.72773998668156, 'validate_perplexity': 119.0062764739217}
{'epoch': 31, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 64.74511908443394, 'validate_perplexity': 118.33702318058288}
{'epoch': 32, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 64.40195351118192, 'validate_perplexity': 118.78970171312399}
{'epoch': 33, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 64.45620887782474, 'validate_perplexity': 118.98426082407651}
{'epoch': 34, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 64.09672291621084, 'validate_perplexity': 118.92595038872669}
{'epoch': 35, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 63.97644549662784, 'validate_perplexity': 118.8847305907958}
{'epoch': 36, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 63.772044626873296, 'validate_perplexity': 118.51179519515878}
{'epoch': 37, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 63.82832587642575, 'validate_perplexity': 118.34819635101412}
{'epoch': 38, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 63.96625718824428, 'validate_perplexity': 118.58709152715979}
{'epoch': 39, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 63.91134758892835, 'validate_perplexity': 118.60909024600448}
{'epoch': 40, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 63.929208613176236, 'validate_perplexity': 118.55639062658365}
{'epoch': 41, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 63.57505667981799, 'validate_perplexity': 118.58771354284005}
{'epoch': 42, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 64.15154706201996, 'validate_perplexity': 118.50738743257342}
{'epoch': 43, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 63.96869735385033, 'validate_perplexity': 118.44309794717955}
{'epoch': 44, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 63.88940913526891, 'validate_perplexity': 118.50755695887223}
Perplexity on Test: 113.338575
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 861.8566806321298, 'validate_perplexity': 444.8218624743826}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 349.2039540271599, 'validate_perplexity': 270.7205460253057}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 235.58424029172738, 'validate_perplexity': 212.9386030853811}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 188.8133377114001, 'validate_perplexity': 189.3916947373114}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 159.33390099239273, 'validate_perplexity': 169.71385086228594}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 141.51266809723217, 'validate_perplexity': 158.82408149193247}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 127.98871070053814, 'validate_perplexity': 154.55635931070861}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 118.85621961558262, 'validate_perplexity': 147.48763965531552}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 109.15876508355096, 'validate_perplexity': 139.77219035300465}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 103.24866211651077, 'validate_perplexity': 137.23813708223682}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 99.46082720755201, 'validate_perplexity': 140.09841534087988}
{'epoch': 12, 'learning_rate': 0.006666666666666667, 'train_perplexity': 87.39453554437381, 'validate_perplexity': 131.36970424797937}
{'epoch': 13, 'learning_rate': 0.006666666666666667, 'train_perplexity': 83.23711357962505, 'validate_perplexity': 127.8693311439885}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 80.41217108782513, 'validate_perplexity': 130.52843440062938}
{'epoch': 15, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 74.68470637328694, 'validate_perplexity': 123.31776751463654}
{'epoch': 16, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 72.82721750780313, 'validate_perplexity': 122.20340990978802}
{'epoch': 17, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 71.02794877958736, 'validate_perplexity': 122.44477387769793}
{'epoch': 18, 'learning_rate': 0.002962962962962963, 'train_perplexity': 67.0793249512401, 'validate_perplexity': 119.1186878285339}
{'epoch': 19, 'learning_rate': 0.002962962962962963, 'train_perplexity': 65.89261873787287, 'validate_perplexity': 121.29890973730532}
{'epoch': 20, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 64.04787018773753, 'validate_perplexity': 118.46365774896238}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 62.540520004647554, 'validate_perplexity': 120.09147981899463}
{'epoch': 22, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 61.761844471398035, 'validate_perplexity': 118.18409073421154}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 60.893478951068104, 'validate_perplexity': 118.57776168345758}
{'epoch': 24, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 59.873850470244825, 'validate_perplexity': 117.89174729036047}
{'epoch': 25, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 59.05660886214797, 'validate_perplexity': 117.27483160376646}
{'epoch': 26, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 59.13035059206434, 'validate_perplexity': 117.17382588318253}
{'epoch': 27, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 59.0972864616035, 'validate_perplexity': 117.93447855869098}
{'epoch': 28, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 57.98204868355881, 'validate_perplexity': 117.41231001281704}
{'epoch': 29, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 57.902918869532634, 'validate_perplexity': 117.68859227894544}
{'epoch': 30, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 57.35801510653571, 'validate_perplexity': 118.45733127602087}
{'epoch': 31, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 57.06480465376028, 'validate_perplexity': 117.57590461059677}
{'epoch': 32, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 56.766866965539904, 'validate_perplexity': 117.43689068191698}
{'epoch': 33, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 56.77214557849455, 'validate_perplexity': 117.80863437102305}
{'epoch': 34, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 56.70704993581823, 'validate_perplexity': 117.48595541594425}
{'epoch': 35, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 56.70169626290119, 'validate_perplexity': 117.90731991969831}
{'epoch': 36, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 56.20689200572381, 'validate_perplexity': 117.4841067154089}
{'epoch': 37, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 56.42701409341152, 'validate_perplexity': 117.2897075324847}
{'epoch': 38, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 56.61122001797986, 'validate_perplexity': 117.45318731005504}
{'epoch': 39, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 56.20780326528896, 'validate_perplexity': 117.46679756726608}
{'epoch': 40, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 56.208098087723116, 'validate_perplexity': 117.2800882953336}
{'epoch': 41, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 56.255611496748074, 'validate_perplexity': 117.28238118141154}
{'epoch': 42, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 56.26797907311923, 'validate_perplexity': 117.26286512087549}
{'epoch': 43, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 55.82248485165524, 'validate_perplexity': 117.42989110637004}
{'epoch': 44, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 56.084916755598236, 'validate_perplexity': 117.24994939991213}
{'epoch': 45, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 55.968330024523894, 'validate_perplexity': 117.36489897792019}
{'epoch': 46, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 55.97612339866815, 'validate_perplexity': 117.36831282820839}
Perplexity on Test: 111.688952
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 766.9764176591682, 'validate_perplexity': 413.7951624435435}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 314.1109673344676, 'validate_perplexity': 254.965721910836}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 220.1449417813977, 'validate_perplexity': 208.9023608966356}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 180.35071533601615, 'validate_perplexity': 184.89499595266565}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 157.22812449274608, 'validate_perplexity': 175.24381388166634}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 140.58137127848198, 'validate_perplexity': 159.81456699751416}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 129.04489290019706, 'validate_perplexity': 152.5539451827519}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 119.4133905017269, 'validate_perplexity': 158.1054136838567}
{'epoch': 9, 'learning_rate': 0.006666666666666667, 'train_perplexity': 104.40253092111327, 'validate_perplexity': 137.25646157362073}
{'epoch': 10, 'learning_rate': 0.006666666666666667, 'train_perplexity': 99.72643831149864, 'validate_perplexity': 139.94178047824707}
{'epoch': 11, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 90.58814345612586, 'validate_perplexity': 130.3147500846144}
{'epoch': 12, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 86.65986281479009, 'validate_perplexity': 128.71052361400186}
{'epoch': 13, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 83.89463069909789, 'validate_perplexity': 131.1126854839379}
{'epoch': 14, 'learning_rate': 0.002962962962962963, 'train_perplexity': 78.74375843300268, 'validate_perplexity': 124.01430231168331}
{'epoch': 15, 'learning_rate': 0.002962962962962963, 'train_perplexity': 76.70254015615289, 'validate_perplexity': 126.39389133080208}
{'epoch': 16, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 73.26175937704703, 'validate_perplexity': 121.67481363770594}
{'epoch': 17, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 72.21713835385238, 'validate_perplexity': 120.02833414603005}
{'epoch': 18, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 70.78883227177552, 'validate_perplexity': 120.84963428962013}
{'epoch': 19, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 68.57723213366343, 'validate_perplexity': 120.33567273340618}
{'epoch': 20, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 67.65487167303725, 'validate_perplexity': 121.38905767027123}
{'epoch': 21, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 66.14326065820178, 'validate_perplexity': 118.7404888442807}
{'epoch': 22, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 65.66324407883783, 'validate_perplexity': 119.90608744798935}
{'epoch': 23, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 64.22760787274925, 'validate_perplexity': 119.12493601622295}
{'epoch': 24, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 64.00940083581742, 'validate_perplexity': 119.05213657260842}
{'epoch': 25, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 63.8487819558442, 'validate_perplexity': 118.65213810157034}
{'epoch': 26, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 63.49219933425079, 'validate_perplexity': 120.75395606920318}
{'epoch': 27, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 62.90000538947688, 'validate_perplexity': 119.4096324719048}
{'epoch': 28, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 62.70879093649188, 'validate_perplexity': 119.80241561174695}
{'epoch': 29, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 62.08516908436805, 'validate_perplexity': 119.93193363966071}
{'epoch': 30, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.77353736405489, 'validate_perplexity': 119.57241790957028}
{'epoch': 31, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.88993885997498, 'validate_perplexity': 119.09091576059907}
{'epoch': 32, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.82100907915437, 'validate_perplexity': 119.45929356001353}
{'epoch': 33, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 61.234532360145465, 'validate_perplexity': 119.34456898449204}
{'epoch': 34, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 61.25842171942097, 'validate_perplexity': 119.44687635205867}
{'epoch': 35, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 61.20177995613705, 'validate_perplexity': 119.49717971547501}
{'epoch': 36, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 60.973032188053956, 'validate_perplexity': 119.05594012169242}
{'epoch': 37, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 61.07085806599666, 'validate_perplexity': 119.29615016477622}
{'epoch': 38, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 61.17846695448081, 'validate_perplexity': 119.22921549623199}
{'epoch': 39, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 60.94800443349524, 'validate_perplexity': 119.09562917269959}
{'epoch': 40, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 61.07912894866495, 'validate_perplexity': 119.16953487159032}
{'epoch': 41, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 60.745921462216074, 'validate_perplexity': 119.21113763822468}
{'epoch': 42, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 61.31470658301636, 'validate_perplexity': 119.25480205540521}
{'epoch': 43, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 60.6188652012607, 'validate_perplexity': 119.30041660385783}
Perplexity on Test: 113.759060
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 821.6536613621176, 'validate_perplexity': 429.2190406518585}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 335.09642153452387, 'validate_perplexity': 261.71783062997446}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 229.62775500802493, 'validate_perplexity': 214.82985527062078}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 185.59077599022612, 'validate_perplexity': 193.83667712283255}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 161.77352709513923, 'validate_perplexity': 166.87144734741668}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 144.48876471742398, 'validate_perplexity': 163.14957771174215}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 132.20668749040948, 'validate_perplexity': 150.56131975447477}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 122.40233445366613, 'validate_perplexity': 150.93532369678198}
{'epoch': 9, 'learning_rate': 0.006666666666666667, 'train_perplexity': 107.54900106105885, 'validate_perplexity': 138.61142023485587}
{'epoch': 10, 'learning_rate': 0.006666666666666667, 'train_perplexity': 101.10787835156658, 'validate_perplexity': 136.70824601229884}
{'epoch': 11, 'learning_rate': 0.006666666666666667, 'train_perplexity': 97.37944862081731, 'validate_perplexity': 131.9192180359697}
{'epoch': 12, 'learning_rate': 0.006666666666666667, 'train_perplexity': 93.15227106193642, 'validate_perplexity': 133.8796396831403}
{'epoch': 13, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 84.78367852479657, 'validate_perplexity': 125.5839468170042}
{'epoch': 14, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 82.42836452805332, 'validate_perplexity': 129.98415613437268}
{'epoch': 15, 'learning_rate': 0.002962962962962963, 'train_perplexity': 77.35102765907673, 'validate_perplexity': 124.16785165082453}
{'epoch': 16, 'learning_rate': 0.002962962962962963, 'train_perplexity': 75.87463880809018, 'validate_perplexity': 122.74829068071254}
{'epoch': 17, 'learning_rate': 0.002962962962962963, 'train_perplexity': 73.64131820473443, 'validate_perplexity': 123.1649170344777}
{'epoch': 18, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 70.36201277768195, 'validate_perplexity': 120.50155929129178}
{'epoch': 19, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 69.59073463332419, 'validate_perplexity': 121.12591860741117}
{'epoch': 20, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 67.23293700217154, 'validate_perplexity': 119.75552414512256}
{'epoch': 21, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 66.84699869320795, 'validate_perplexity': 120.61975420058606}
{'epoch': 22, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 64.97952794696994, 'validate_perplexity': 119.47518719100242}
{'epoch': 23, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 64.76095880840906, 'validate_perplexity': 120.33452512847407}
{'epoch': 24, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 63.86961005494263, 'validate_perplexity': 118.90258885893417}
{'epoch': 25, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 63.68662252591834, 'validate_perplexity': 119.42477919312904}
{'epoch': 26, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 63.2442143480275, 'validate_perplexity': 118.63426088000121}
{'epoch': 27, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 62.31758400686054, 'validate_perplexity': 119.13055966535951}
{'epoch': 28, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 62.310720141430394, 'validate_perplexity': 119.02818268314621}
{'epoch': 29, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 62.082327115919135, 'validate_perplexity': 118.65739994883631}
{'epoch': 30, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 62.19859866191822, 'validate_perplexity': 119.63115940271885}
{'epoch': 31, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.4098594877389, 'validate_perplexity': 118.92339854205524}
{'epoch': 32, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.60589597280706, 'validate_perplexity': 118.68065671348094}
{'epoch': 33, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.410064465601174, 'validate_perplexity': 118.88909569752677}
{'epoch': 34, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 61.048904888478816, 'validate_perplexity': 118.90519695747618}
{'epoch': 35, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 60.93794971642175, 'validate_perplexity': 118.7184091452534}
{'epoch': 36, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 61.055367736307055, 'validate_perplexity': 118.88144269415082}
{'epoch': 37, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 61.274694012764904, 'validate_perplexity': 118.95243611938433}
{'epoch': 38, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 60.77587960062318, 'validate_perplexity': 118.91052672804662}
{'epoch': 39, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 61.215731155205575, 'validate_perplexity': 118.81009506721347}
{'epoch': 40, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 60.83058984393404, 'validate_perplexity': 118.90803191208197}
{'epoch': 41, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 61.09025563494756, 'validate_perplexity': 118.82652559296497}
{'epoch': 42, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 60.59227813220743, 'validate_perplexity': 118.76138341716774}
{'epoch': 43, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 60.7196550599266, 'validate_perplexity': 118.81949984873036}
{'epoch': 44, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 60.555133574594784, 'validate_perplexity': 118.81372091889754}
Perplexity on Test: 113.456831
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 854.6360264593039, 'validate_perplexity': 458.36268131507785}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 351.5908855330578, 'validate_perplexity': 276.0647692801304}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 234.03084408642013, 'validate_perplexity': 217.97197779424263}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 187.71248212912877, 'validate_perplexity': 197.9883138142366}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 160.98730822972277, 'validate_perplexity': 170.80580166693142}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 143.87680666723813, 'validate_perplexity': 167.87649327388297}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 130.5032293073712, 'validate_perplexity': 154.5947608506961}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 120.40874030990848, 'validate_perplexity': 147.2113004601205}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 112.59135047761178, 'validate_perplexity': 143.3128089639649}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 105.26994923881813, 'validate_perplexity': 140.13623158147553}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 99.9606273988635, 'validate_perplexity': 134.0013078240073}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 95.06330312196013, 'validate_perplexity': 133.95198857044807}
{'epoch': 13, 'learning_rate': 0.006666666666666667, 'train_perplexity': 85.14530797361373, 'validate_perplexity': 126.56487034904944}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 81.90687284140927, 'validate_perplexity': 129.77489003751174}
{'epoch': 15, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 75.06859460961806, 'validate_perplexity': 121.85219106857716}
{'epoch': 16, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 73.55379365579505, 'validate_perplexity': 122.15860762807905}
{'epoch': 17, 'learning_rate': 0.002962962962962963, 'train_perplexity': 69.25744434700344, 'validate_perplexity': 121.68090579281316}
{'epoch': 18, 'learning_rate': 0.002962962962962963, 'train_perplexity': 67.93689722149419, 'validate_perplexity': 120.02661713921833}
{'epoch': 19, 'learning_rate': 0.002962962962962963, 'train_perplexity': 67.0386831706002, 'validate_perplexity': 121.813209754262}
{'epoch': 20, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 64.41411552022677, 'validate_perplexity': 119.28938106119949}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 63.71921588625512, 'validate_perplexity': 119.29814115069328}
{'epoch': 22, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 61.74656163485543, 'validate_perplexity': 118.41226493474102}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 60.926124470078555, 'validate_perplexity': 117.16600394748971}
{'epoch': 24, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 60.6214378299896, 'validate_perplexity': 118.44371920758145}
{'epoch': 25, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 59.728279578768685, 'validate_perplexity': 117.8371188099862}
{'epoch': 26, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 59.713499943516766, 'validate_perplexity': 117.70907723882367}
{'epoch': 27, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 58.99843002112517, 'validate_perplexity': 118.22997225468714}
{'epoch': 28, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 58.13117989064116, 'validate_perplexity': 117.46657351734402}
{'epoch': 29, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 57.855807326185655, 'validate_perplexity': 117.4582839706412}
{'epoch': 30, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 57.55126989497688, 'validate_perplexity': 116.68957373484112}
{'epoch': 31, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 57.04556998834073, 'validate_perplexity': 118.04919858847386}
{'epoch': 32, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 56.77366157857476, 'validate_perplexity': 116.81004500495362}
{'epoch': 33, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 56.9846984476092, 'validate_perplexity': 117.34061313886438}
{'epoch': 34, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 56.6267978911911, 'validate_perplexity': 117.43106700590434}
{'epoch': 35, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 56.41081870679213, 'validate_perplexity': 117.03428282802655}
{'epoch': 36, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 56.45904191090894, 'validate_perplexity': 116.9977354126028}
{'epoch': 37, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 56.07948811376143, 'validate_perplexity': 116.98674552185807}
{'epoch': 38, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 56.08924934925381, 'validate_perplexity': 116.86598056069468}
{'epoch': 39, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 56.217694062047315, 'validate_perplexity': 116.97252156168904}
{'epoch': 40, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 56.108589634741456, 'validate_perplexity': 116.90438211156757}
{'epoch': 41, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 56.40377166808445, 'validate_perplexity': 117.17801642067597}
{'epoch': 42, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 55.868926361362135, 'validate_perplexity': 116.96694401019235}
{'epoch': 43, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 55.930606223421165, 'validate_perplexity': 116.96298411007663}
{'epoch': 44, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 56.34944221851919, 'validate_perplexity': 116.92060484389572}
{'epoch': 45, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 56.50167515986577, 'validate_perplexity': 116.91798452507766}
Perplexity on Test: 111.218670
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 807.3086294478112, 'validate_perplexity': 435.52196813316425}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 329.90091843581826, 'validate_perplexity': 256.49339868572696}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 225.67598696744236, 'validate_perplexity': 212.57277513113635}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 181.99565807248075, 'validate_perplexity': 181.3060938982464}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 158.28169957232913, 'validate_perplexity': 176.9723388248375}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 140.69490637565337, 'validate_perplexity': 161.7049644981696}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 129.4399871220952, 'validate_perplexity': 155.12872508121566}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 119.17373995599417, 'validate_perplexity': 154.7463225580827}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 112.30277351442085, 'validate_perplexity': 141.49229105791474}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 106.21125356539679, 'validate_perplexity': 139.93470733626643}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 100.82623119989053, 'validate_perplexity': 137.08025522009723}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 96.17080889182859, 'validate_perplexity': 133.1746792056325}
{'epoch': 13, 'learning_rate': 0.01, 'train_perplexity': 92.75601519630806, 'validate_perplexity': 134.21694115451967}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 82.54006608123834, 'validate_perplexity': 126.90964058819105}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 79.26434685632167, 'validate_perplexity': 125.82820765434947}
{'epoch': 16, 'learning_rate': 0.006666666666666667, 'train_perplexity': 76.82659175441906, 'validate_perplexity': 131.05755496677244}
{'epoch': 17, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 71.08870144280083, 'validate_perplexity': 124.20397368975866}
{'epoch': 18, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 69.1496701978856, 'validate_perplexity': 123.75326151458415}
{'epoch': 19, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 68.00612784388996, 'validate_perplexity': 121.87741066324033}
{'epoch': 20, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 66.43637459429904, 'validate_perplexity': 120.65352080679884}
{'epoch': 21, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 65.40618694677447, 'validate_perplexity': 122.94845394752868}
{'epoch': 22, 'learning_rate': 0.002962962962962963, 'train_perplexity': 61.478448101772216, 'validate_perplexity': 121.99561712292045}
{'epoch': 23, 'learning_rate': 0.002962962962962963, 'train_perplexity': 60.8658426919382, 'validate_perplexity': 121.33234576502115}
{'epoch': 24, 'learning_rate': 0.002962962962962963, 'train_perplexity': 59.585960647963326, 'validate_perplexity': 120.91603721286256}
{'epoch': 25, 'learning_rate': 0.002962962962962963, 'train_perplexity': 59.16701616801296, 'validate_perplexity': 121.94711128433308}
{'epoch': 26, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 56.62531281378889, 'validate_perplexity': 122.91715143999285}
{'epoch': 27, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 54.997697294654174, 'validate_perplexity': 119.98616012133978}
{'epoch': 28, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 54.119831264846475, 'validate_perplexity': 119.09085897363877}
{'epoch': 29, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 54.095191814474056, 'validate_perplexity': 120.86340759192419}
{'epoch': 30, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 52.80349023634799, 'validate_perplexity': 119.20266813838734}
{'epoch': 31, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 52.94369460644942, 'validate_perplexity': 120.48351832091176}
{'epoch': 32, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 51.948644894405746, 'validate_perplexity': 120.12864995806444}
{'epoch': 33, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 51.782363471465416, 'validate_perplexity': 119.89333794966655}
{'epoch': 34, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 51.6998846901587, 'validate_perplexity': 120.66215092773064}
{'epoch': 35, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 51.246907922262196, 'validate_perplexity': 119.75198375666031}
{'epoch': 36, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 51.31817740894846, 'validate_perplexity': 121.28155903013052}
{'epoch': 37, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 50.759316366042576, 'validate_perplexity': 120.31409959179511}
{'epoch': 38, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 50.452265940504724, 'validate_perplexity': 120.41143886214041}
{'epoch': 39, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 50.259681142879415, 'validate_perplexity': 120.49035518608405}
{'epoch': 40, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 50.27119797313395, 'validate_perplexity': 120.57185290252198}
{'epoch': 41, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 50.104217036094504, 'validate_perplexity': 120.68827518887838}
{'epoch': 42, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 50.26405507043626, 'validate_perplexity': 120.52897064780893}
{'epoch': 43, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 50.094159702037395, 'validate_perplexity': 120.49673277978206}
{'epoch': 44, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 49.84379733279477, 'validate_perplexity': 120.80751731954888}
{'epoch': 45, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 50.00142171306891, 'validate_perplexity': 120.7042747751725}
{'epoch': 46, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 50.174196434006554, 'validate_perplexity': 120.51822373358443}
{'epoch': 47, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 50.124170454456404, 'validate_perplexity': 120.68482231902215}
{'epoch': 48, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 50.08218192572768, 'validate_perplexity': 120.71463535079981}
{'epoch': 49, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 49.98157674114506, 'validate_perplexity': 120.61670589212196}
{'epoch': 50, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 50.1620679650633, 'validate_perplexity': 120.648745738487}
Perplexity on Test: 114.602473
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 828.0730945952289, 'validate_perplexity': 453.18506899725475}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 353.7548585206133, 'validate_perplexity': 282.38730551312466}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 235.44846603068868, 'validate_perplexity': 211.98234858119739}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 188.15544341563407, 'validate_perplexity': 195.0798359238712}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 163.1120844541071, 'validate_perplexity': 174.19067419755626}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 144.95630083631843, 'validate_perplexity': 176.4616076859581}
{'epoch': 7, 'learning_rate': 0.006666666666666667, 'train_perplexity': 124.69913206995852, 'validate_perplexity': 149.65479853375822}
{'epoch': 8, 'learning_rate': 0.006666666666666667, 'train_perplexity': 115.59580411148234, 'validate_perplexity': 143.32169303275145}
{'epoch': 9, 'learning_rate': 0.006666666666666667, 'train_perplexity': 108.53816268707837, 'validate_perplexity': 145.84527937636435}
{'epoch': 10, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 99.14967462066305, 'validate_perplexity': 131.96080412348346}
{'epoch': 11, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 94.19339616958702, 'validate_perplexity': 133.1155078984749}
{'epoch': 12, 'learning_rate': 0.002962962962962963, 'train_perplexity': 87.96908575846928, 'validate_perplexity': 128.71481986292463}
{'epoch': 13, 'learning_rate': 0.002962962962962963, 'train_perplexity': 85.80014059608656, 'validate_perplexity': 128.31839020082433}
{'epoch': 14, 'learning_rate': 0.002962962962962963, 'train_perplexity': 83.64928510771303, 'validate_perplexity': 127.80112072603723}
{'epoch': 15, 'learning_rate': 0.002962962962962963, 'train_perplexity': 81.25044334680295, 'validate_perplexity': 129.99487933734298}
{'epoch': 16, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 77.67269473894783, 'validate_perplexity': 123.70016377075378}
{'epoch': 17, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 76.14901515538004, 'validate_perplexity': 125.0716169630434}
{'epoch': 18, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 73.40351849878758, 'validate_perplexity': 122.91234539380787}
{'epoch': 19, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 73.06321989445152, 'validate_perplexity': 123.42283350097884}
{'epoch': 20, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 71.12681270847911, 'validate_perplexity': 121.76837633676563}
{'epoch': 21, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 70.03938801820972, 'validate_perplexity': 121.89182420364573}
{'epoch': 22, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 69.13658110334151, 'validate_perplexity': 121.27548686897718}
{'epoch': 23, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 68.86375935929846, 'validate_perplexity': 120.76627881219457}
{'epoch': 24, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 68.78693130072809, 'validate_perplexity': 121.03077190239797}
{'epoch': 25, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 67.74174001449708, 'validate_perplexity': 121.81913457495138}
{'epoch': 26, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 67.08920932705477, 'validate_perplexity': 120.60250064044142}
{'epoch': 27, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 67.04392588735946, 'validate_perplexity': 120.48736760063858}
{'epoch': 28, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 66.94811927082694, 'validate_perplexity': 120.84001119821446}
{'epoch': 29, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 66.41996672675906, 'validate_perplexity': 120.70962762821314}
{'epoch': 30, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 66.31483726388406, 'validate_perplexity': 120.55201940076411}
{'epoch': 31, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 65.95747019154108, 'validate_perplexity': 120.26018365664414}
{'epoch': 32, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 65.88174829765421, 'validate_perplexity': 120.2455043544156}
{'epoch': 33, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 65.5738816546629, 'validate_perplexity': 120.72361523568038}
{'epoch': 34, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 65.71169956241677, 'validate_perplexity': 120.47892232709502}
{'epoch': 35, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 65.76915948885463, 'validate_perplexity': 120.55316907989314}
{'epoch': 36, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 65.32483663751843, 'validate_perplexity': 120.33486940880454}
{'epoch': 37, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 65.57951014761666, 'validate_perplexity': 120.46456097637878}
{'epoch': 38, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 65.20278321081403, 'validate_perplexity': 120.39788929554123}
{'epoch': 39, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 65.52549655233128, 'validate_perplexity': 120.46766288317612}
{'epoch': 40, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 65.39471073278936, 'validate_perplexity': 120.37309064886036}
{'epoch': 41, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 65.2716241524104, 'validate_perplexity': 120.37464041462331}
{'epoch': 42, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 65.35272121920508, 'validate_perplexity': 120.27457799360735}
{'epoch': 43, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 65.32888617326023, 'validate_perplexity': 120.33871393940674}
{'epoch': 44, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 65.3362694344273, 'validate_perplexity': 120.35776626161798}
Perplexity on Test: 114.975441
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 818.0773265775283, 'validate_perplexity': 443.03252555909523}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 341.6308259118493, 'validate_perplexity': 266.6787601037536}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 233.35099479844828, 'validate_perplexity': 212.01591014984018}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 188.92654338660265, 'validate_perplexity': 190.3916980983337}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 163.51321085083123, 'validate_perplexity': 178.39343714937306}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 146.6645102940731, 'validate_perplexity': 166.9328074866861}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 133.46125698767244, 'validate_perplexity': 157.36628397358973}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 123.41877273885262, 'validate_perplexity': 152.13965319992997}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 115.40100766144737, 'validate_perplexity': 144.69588144728957}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 109.03011893685868, 'validate_perplexity': 140.64701329763463}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 103.5216736707889, 'validate_perplexity': 142.41386678600642}
{'epoch': 12, 'learning_rate': 0.006666666666666667, 'train_perplexity': 91.72657853883436, 'validate_perplexity': 130.22517656790444}
{'epoch': 13, 'learning_rate': 0.006666666666666667, 'train_perplexity': 88.56607758636213, 'validate_perplexity': 128.576307416218}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 85.07980406229379, 'validate_perplexity': 128.3783062348664}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 82.69551962700811, 'validate_perplexity': 126.90849080398456}
{'epoch': 16, 'learning_rate': 0.006666666666666667, 'train_perplexity': 79.88107294594582, 'validate_perplexity': 126.8196258604469}
{'epoch': 17, 'learning_rate': 0.006666666666666667, 'train_perplexity': 78.1997740346874, 'validate_perplexity': 125.19930912254091}
{'epoch': 18, 'learning_rate': 0.006666666666666667, 'train_perplexity': 75.9194064807668, 'validate_perplexity': 129.28965846796098}
{'epoch': 19, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 70.03638231825846, 'validate_perplexity': 125.21925040450455}
{'epoch': 20, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 68.56009938378615, 'validate_perplexity': 123.44343362755394}
{'epoch': 21, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 67.27411357104708, 'validate_perplexity': 121.42541352019725}
{'epoch': 22, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 65.72827722282257, 'validate_perplexity': 124.49505009965279}
{'epoch': 23, 'learning_rate': 0.002962962962962963, 'train_perplexity': 61.973926281600896, 'validate_perplexity': 120.74238304919184}
{'epoch': 24, 'learning_rate': 0.002962962962962963, 'train_perplexity': 61.3046206026526, 'validate_perplexity': 121.0774121579209}
{'epoch': 25, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 58.668264231929015, 'validate_perplexity': 120.3596601813416}
{'epoch': 26, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 58.12621838230074, 'validate_perplexity': 121.8709599936807}
{'epoch': 27, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 56.36438364546709, 'validate_perplexity': 121.11523397100953}
{'epoch': 28, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 55.661331557070554, 'validate_perplexity': 121.50922394654408}
{'epoch': 29, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.594245008794715, 'validate_perplexity': 120.68056391558274}
{'epoch': 30, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.43283487782994, 'validate_perplexity': 119.81069920340705}
{'epoch': 31, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.17548544122709, 'validate_perplexity': 120.97157390391126}
{'epoch': 32, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 53.50684141427719, 'validate_perplexity': 121.41441299019145}
{'epoch': 33, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 52.893341129716084, 'validate_perplexity': 121.02505855229018}
{'epoch': 34, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 52.796327391673614, 'validate_perplexity': 121.37065233111781}
{'epoch': 35, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 52.395593212102014, 'validate_perplexity': 121.23426204375964}
{'epoch': 36, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 52.41859618465496, 'validate_perplexity': 120.67331346627316}
{'epoch': 37, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 51.965368040397415, 'validate_perplexity': 121.08151135615924}
{'epoch': 38, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 52.012171892033734, 'validate_perplexity': 120.78482287924166}
{'epoch': 39, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 51.99240898263102, 'validate_perplexity': 121.22472393382158}
{'epoch': 40, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 51.83448929279104, 'validate_perplexity': 121.14064762426663}
{'epoch': 41, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 51.473714118186045, 'validate_perplexity': 120.95121325678933}
{'epoch': 42, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 51.59230265574798, 'validate_perplexity': 120.40162101555222}
{'epoch': 43, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 51.577617873209604, 'validate_perplexity': 121.15578283256272}
{'epoch': 44, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 51.3830282416524, 'validate_perplexity': 121.17658238625853}
{'epoch': 45, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 51.61730345630008, 'validate_perplexity': 121.13787496661382}
{'epoch': 46, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 51.69778927640568, 'validate_perplexity': 121.03960215581176}
{'epoch': 47, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 51.62633723545323, 'validate_perplexity': 120.94481160856503}
{'epoch': 48, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 51.45675660756497, 'validate_perplexity': 120.8691133108146}
{'epoch': 49, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 51.5168692971523, 'validate_perplexity': 121.00618910033293}
{'epoch': 50, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 51.310665541728156, 'validate_perplexity': 121.09554205682221}
{'epoch': 51, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 51.371268945608385, 'validate_perplexity': 121.04664373457913}
{'epoch': 52, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 50.99110039394112, 'validate_perplexity': 121.04352691967621}
Perplexity on Test: 114.502459
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 847.5845791052808, 'validate_perplexity': 445.42720485030526}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 347.32840626892676, 'validate_perplexity': 269.489900395818}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 236.3711123742243, 'validate_perplexity': 214.4207970959761}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 189.91512519924348, 'validate_perplexity': 189.12619194114367}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 162.12628496784342, 'validate_perplexity': 170.6702469739187}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 145.21476536741426, 'validate_perplexity': 164.20195561973227}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 131.20981407146817, 'validate_perplexity': 154.47796429850467}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 121.36683270478385, 'validate_perplexity': 153.3415270808859}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 113.94114129693597, 'validate_perplexity': 143.14909905672585}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 105.84617150326142, 'validate_perplexity': 142.80807534736337}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 101.91681607713183, 'validate_perplexity': 138.3418825415932}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 96.40400300258935, 'validate_perplexity': 137.16649871997768}
{'epoch': 13, 'learning_rate': 0.01, 'train_perplexity': 91.46360586383466, 'validate_perplexity': 130.52943025735186}
{'epoch': 14, 'learning_rate': 0.01, 'train_perplexity': 88.1574610855482, 'validate_perplexity': 130.0414394515654}
{'epoch': 15, 'learning_rate': 0.01, 'train_perplexity': 85.49045984620223, 'validate_perplexity': 132.39026431417022}
{'epoch': 16, 'learning_rate': 0.006666666666666667, 'train_perplexity': 76.53455426912127, 'validate_perplexity': 124.38984244465269}
{'epoch': 17, 'learning_rate': 0.006666666666666667, 'train_perplexity': 73.84882313279547, 'validate_perplexity': 123.26797185881135}
{'epoch': 18, 'learning_rate': 0.006666666666666667, 'train_perplexity': 72.1047241298523, 'validate_perplexity': 125.01664208017276}
{'epoch': 19, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 66.80627468319605, 'validate_perplexity': 122.94681241952827}
{'epoch': 20, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 65.72987566818969, 'validate_perplexity': 120.15196591474123}
{'epoch': 21, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 64.37634709321875, 'validate_perplexity': 122.28390883761708}
{'epoch': 22, 'learning_rate': 0.002962962962962963, 'train_perplexity': 61.1191012458201, 'validate_perplexity': 119.85715505474057}
{'epoch': 23, 'learning_rate': 0.002962962962962963, 'train_perplexity': 59.97168577854879, 'validate_perplexity': 120.39852080925868}
{'epoch': 24, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 57.84582140492276, 'validate_perplexity': 119.40262918645492}
{'epoch': 25, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 57.512616280569645, 'validate_perplexity': 119.84092488768555}
{'epoch': 26, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 55.94476967563756, 'validate_perplexity': 117.94656981992455}
{'epoch': 27, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 55.599153291180876, 'validate_perplexity': 118.41390238373187}
{'epoch': 28, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.85349212937884, 'validate_perplexity': 118.22980312547617}
{'epoch': 29, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.520895510915, 'validate_perplexity': 118.38855269474315}
{'epoch': 30, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 53.564138355832746, 'validate_perplexity': 118.21012941172889}
{'epoch': 31, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 53.201914943009555, 'validate_perplexity': 118.27575879071544}
{'epoch': 32, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 53.055448078607455, 'validate_perplexity': 118.05792389879205}
{'epoch': 33, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 52.805303131436595, 'validate_perplexity': 118.02302652649917}
{'epoch': 34, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 52.57076204143758, 'validate_perplexity': 117.76729643192961}
{'epoch': 35, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 52.46163046637053, 'validate_perplexity': 117.67450743007505}
{'epoch': 36, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 52.44554786598218, 'validate_perplexity': 118.15146594438998}
{'epoch': 37, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 52.28787243691015, 'validate_perplexity': 117.9638371650705}
{'epoch': 38, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 52.332458816627486, 'validate_perplexity': 117.62637354348523}
{'epoch': 39, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 51.955469765361116, 'validate_perplexity': 118.37263428381554}
{'epoch': 40, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 52.38743651694176, 'validate_perplexity': 118.29628954513336}
{'epoch': 41, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 51.732411467935236, 'validate_perplexity': 118.31507492268402}
{'epoch': 42, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 51.72043657114696, 'validate_perplexity': 117.88443954531414}
{'epoch': 43, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 51.89322449508363, 'validate_perplexity': 118.19761660398618}
{'epoch': 44, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 51.81454682531249, 'validate_perplexity': 118.07470082341383}
{'epoch': 45, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 52.055790921057834, 'validate_perplexity': 117.9906149853576}
{'epoch': 46, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 52.0074350525217, 'validate_perplexity': 117.89528889899896}
{'epoch': 47, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 51.6145837861146, 'validate_perplexity': 118.13338250642158}
{'epoch': 48, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 51.80729579850079, 'validate_perplexity': 118.1877538377672}
{'epoch': 49, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 51.96346009100576, 'validate_perplexity': 118.0649046113791}
{'epoch': 50, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 51.60814821435272, 'validate_perplexity': 118.21029851279637}
{'epoch': 51, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 51.833092823105204, 'validate_perplexity': 117.99444088443352}
{'epoch': 52, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 51.90825902782117, 'validate_perplexity': 117.93734660618655}
{'epoch': 53, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 51.90787537654476, 'validate_perplexity': 117.934534794246}
{'epoch': 54, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 51.74469755879874, 'validate_perplexity': 118.03580227048066}
Perplexity on Test: 112.454262
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 772.9185420781017, 'validate_perplexity': 413.13922283525426}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 326.4116681378502, 'validate_perplexity': 263.23298951721966}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 228.06621457318863, 'validate_perplexity': 218.34709752756672}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 184.8406944085656, 'validate_perplexity': 185.74198971454504}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 159.30518452978026, 'validate_perplexity': 170.79953039747502}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 139.9909687187835, 'validate_perplexity': 159.05098869715604}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 127.32757978750134, 'validate_perplexity': 150.93021380175284}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 116.80659169506613, 'validate_perplexity': 143.51078082307944}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 108.76866522451623, 'validate_perplexity': 139.8849385418195}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 102.8809586342226, 'validate_perplexity': 136.14105545273657}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 97.09017652789262, 'validate_perplexity': 132.87889537424007}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 91.94706717340523, 'validate_perplexity': 132.2790784405425}
{'epoch': 13, 'learning_rate': 0.01, 'train_perplexity': 88.3536965489171, 'validate_perplexity': 135.26561863189553}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 79.0470902959485, 'validate_perplexity': 124.11280056183008}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 76.44231525732154, 'validate_perplexity': 125.26852017800346}
{'epoch': 16, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 70.69532592854672, 'validate_perplexity': 119.65483524678153}
{'epoch': 17, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 68.97523190789033, 'validate_perplexity': 121.19027737491511}
{'epoch': 18, 'learning_rate': 0.002962962962962963, 'train_perplexity': 65.38825624733889, 'validate_perplexity': 119.42779737840068}
{'epoch': 19, 'learning_rate': 0.002962962962962963, 'train_perplexity': 64.29869976387279, 'validate_perplexity': 118.49958948497525}
{'epoch': 20, 'learning_rate': 0.002962962962962963, 'train_perplexity': 63.37861677390014, 'validate_perplexity': 118.44789868043843}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 61.2632708197606, 'validate_perplexity': 118.18211834086155}
{'epoch': 22, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 60.183302289691554, 'validate_perplexity': 117.70172469235995}
{'epoch': 23, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 59.41663410268229, 'validate_perplexity': 120.03966700702271}
{'epoch': 24, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 57.88055887469036, 'validate_perplexity': 117.5352087843953}
{'epoch': 25, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 57.769824936603854, 'validate_perplexity': 118.34249677368234}
{'epoch': 26, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 56.638085752349056, 'validate_perplexity': 117.35113265489844}
{'epoch': 27, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 56.32102144882015, 'validate_perplexity': 116.47838140844917}
{'epoch': 28, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 55.95546799296706, 'validate_perplexity': 117.64662328018615}
{'epoch': 29, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 55.46894775465691, 'validate_perplexity': 117.28389115526012}
{'epoch': 30, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 54.797023858696406, 'validate_perplexity': 117.4628767553266}
{'epoch': 31, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 54.80909690528248, 'validate_perplexity': 116.6470710467571}
{'epoch': 32, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 54.52279336793427, 'validate_perplexity': 116.48537981245254}
{'epoch': 33, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 54.544350379774485, 'validate_perplexity': 116.68890603365465}
{'epoch': 34, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 54.030846797871035, 'validate_perplexity': 116.49082330627213}
{'epoch': 35, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 54.122553902889116, 'validate_perplexity': 116.45272419086272}
{'epoch': 36, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 53.791721573131106, 'validate_perplexity': 116.70226078332824}
{'epoch': 37, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 53.69939715887579, 'validate_perplexity': 116.75853444861534}
{'epoch': 38, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 53.54682410490676, 'validate_perplexity': 116.61042213272026}
{'epoch': 39, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 53.580129640982314, 'validate_perplexity': 116.55583160781089}
{'epoch': 40, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 53.36720693577815, 'validate_perplexity': 116.45583385560086}
{'epoch': 41, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 53.482685063222874, 'validate_perplexity': 116.53243555446193}
{'epoch': 42, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 53.544462343814835, 'validate_perplexity': 116.46788459080217}
{'epoch': 43, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 53.22802568908643, 'validate_perplexity': 116.58123359068435}
{'epoch': 44, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 53.10444918737565, 'validate_perplexity': 116.66659588255848}
{'epoch': 45, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 53.423488218628215, 'validate_perplexity': 116.65852967105073}
{'epoch': 46, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 53.245046437762, 'validate_perplexity': 116.56061142684598}
{'epoch': 47, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 53.5058336187851, 'validate_perplexity': 116.60330501458444}
Perplexity on Test: 111.778940
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 798.8450369759188, 'validate_perplexity': 425.89731179008487}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 327.0167542897554, 'validate_perplexity': 260.63646031706355}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 223.14018307222767, 'validate_perplexity': 209.60191217517118}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 183.78604771723315, 'validate_perplexity': 191.12247037448506}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 159.69253236476266, 'validate_perplexity': 173.20843850378148}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 143.76145796214385, 'validate_perplexity': 164.02947837108002}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 130.45214947070687, 'validate_perplexity': 150.54689000638888}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 122.09507366385532, 'validate_perplexity': 148.2145023108093}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 114.20059688765687, 'validate_perplexity': 143.49777947019268}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 107.88100810603724, 'validate_perplexity': 142.37020849157452}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 101.92551544115494, 'validate_perplexity': 142.2648183776182}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 97.54982504925427, 'validate_perplexity': 135.74000514692196}
{'epoch': 13, 'learning_rate': 0.01, 'train_perplexity': 93.83538777200764, 'validate_perplexity': 136.3168345445813}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 83.69596602038446, 'validate_perplexity': 127.62488106103774}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 80.61011345941436, 'validate_perplexity': 129.31826728531422}
{'epoch': 16, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 74.3287208033654, 'validate_perplexity': 122.76497313388701}
{'epoch': 17, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 72.39532210434803, 'validate_perplexity': 122.47338646706268}
{'epoch': 18, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 70.66408351730932, 'validate_perplexity': 126.47763312315345}
{'epoch': 19, 'learning_rate': 0.002962962962962963, 'train_perplexity': 66.25013988222709, 'validate_perplexity': 120.4110943627433}
{'epoch': 20, 'learning_rate': 0.002962962962962963, 'train_perplexity': 65.36537446195544, 'validate_perplexity': 120.86738427664648}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 62.79242241370281, 'validate_perplexity': 119.91723722356052}
{'epoch': 22, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 62.20865374101424, 'validate_perplexity': 120.99395726616306}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 60.454213231913094, 'validate_perplexity': 118.86438110470937}
{'epoch': 24, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 59.93289257063095, 'validate_perplexity': 119.64428037608987}
{'epoch': 25, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 58.49473936435178, 'validate_perplexity': 119.1720919998311}
{'epoch': 26, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 58.247522372261685, 'validate_perplexity': 119.8128130392074}
{'epoch': 27, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 57.79743342437078, 'validate_perplexity': 118.11620298785336}
{'epoch': 28, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 57.04945992795209, 'validate_perplexity': 118.7460943444379}
{'epoch': 29, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 56.90888352482372, 'validate_perplexity': 118.24525125818398}
{'epoch': 30, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 56.85626350264736, 'validate_perplexity': 118.70697460748957}
{'epoch': 31, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 56.598075303964364, 'validate_perplexity': 118.52852358066474}
{'epoch': 32, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 55.788078081592516, 'validate_perplexity': 118.30700756328842}
{'epoch': 33, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 56.09080060612432, 'validate_perplexity': 118.03934820670248}
{'epoch': 34, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 56.24952260442743, 'validate_perplexity': 119.17351265034097}
{'epoch': 35, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 55.635592360704344, 'validate_perplexity': 118.44614780223769}
{'epoch': 36, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 55.74800352625506, 'validate_perplexity': 118.32415841223275}
{'epoch': 37, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 55.78783866564872, 'validate_perplexity': 118.5388104494197}
{'epoch': 38, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 55.589318304767275, 'validate_perplexity': 118.0302303002038}
{'epoch': 39, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 55.68458667232942, 'validate_perplexity': 118.00327468411207}
{'epoch': 40, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 55.11065908552106, 'validate_perplexity': 117.86471089626728}
{'epoch': 41, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 55.2735915767152, 'validate_perplexity': 118.16414290150595}
{'epoch': 42, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 55.2295147317727, 'validate_perplexity': 118.34560046987309}
{'epoch': 43, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 54.86513286594437, 'validate_perplexity': 118.36461944034784}
{'epoch': 44, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 55.22277325912538, 'validate_perplexity': 118.27231854586994}
{'epoch': 45, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 54.984901010042826, 'validate_perplexity': 118.10122223418725}
{'epoch': 46, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 55.156034981788274, 'validate_perplexity': 118.0421062307434}
{'epoch': 47, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 55.427331616309274, 'validate_perplexity': 118.15586046883506}
{'epoch': 48, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 55.17392218553327, 'validate_perplexity': 118.09125489075522}
{'epoch': 49, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 55.065530577130914, 'validate_perplexity': 118.06203346187704}
{'epoch': 50, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 54.935291132075754, 'validate_perplexity': 118.20753655898521}
Perplexity on Test: 112.119193
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 829.5938953735649, 'validate_perplexity': 438.1034887854726}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 346.1766176634946, 'validate_perplexity': 275.7482300090942}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 233.71803047120923, 'validate_perplexity': 223.58975621646377}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 186.26821171776675, 'validate_perplexity': 192.17852522825595}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 160.46186858743027, 'validate_perplexity': 171.90535195460242}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 141.12042038419523, 'validate_perplexity': 159.73708476840605}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 127.37682878632906, 'validate_perplexity': 154.7127522974371}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 117.55381724875078, 'validate_perplexity': 143.55287222736507}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 109.26025955042124, 'validate_perplexity': 141.42477092259176}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 102.50664596842556, 'validate_perplexity': 139.97354729711597}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 96.27996653649292, 'validate_perplexity': 134.47683894110958}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 92.31692985086276, 'validate_perplexity': 133.57738554041117}
{'epoch': 13, 'learning_rate': 0.01, 'train_perplexity': 87.62763101011447, 'validate_perplexity': 129.54891017294995}
{'epoch': 14, 'learning_rate': 0.01, 'train_perplexity': 83.93264318161233, 'validate_perplexity': 132.34065453391136}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 75.35762549913058, 'validate_perplexity': 124.46744896211258}
{'epoch': 16, 'learning_rate': 0.006666666666666667, 'train_perplexity': 72.97370304449235, 'validate_perplexity': 126.80414589392839}
{'epoch': 17, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 67.54546857430034, 'validate_perplexity': 121.0984869781918}
{'epoch': 18, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 65.71624311760041, 'validate_perplexity': 122.21634678463913}
{'epoch': 19, 'learning_rate': 0.002962962962962963, 'train_perplexity': 62.635872772965854, 'validate_perplexity': 119.98049608292963}
{'epoch': 20, 'learning_rate': 0.002962962962962963, 'train_perplexity': 61.44102397586407, 'validate_perplexity': 120.38462827261537}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 59.56175785894955, 'validate_perplexity': 118.24761939853987}
{'epoch': 22, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 58.615386956135545, 'validate_perplexity': 119.59511265865868}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 57.14052758238118, 'validate_perplexity': 119.516497732595}
{'epoch': 24, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 56.62641986779351, 'validate_perplexity': 117.64308914153492}
{'epoch': 25, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 56.62563682735483, 'validate_perplexity': 118.93621503619029}
{'epoch': 26, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 55.522772451484364, 'validate_perplexity': 118.29059246760185}
{'epoch': 27, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 55.14065136603342, 'validate_perplexity': 118.04925487873166}
{'epoch': 28, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.57890096860833, 'validate_perplexity': 118.0108711553448}
{'epoch': 29, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 54.23479143227409, 'validate_perplexity': 118.4125472519171}
{'epoch': 30, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 53.419450693973445, 'validate_perplexity': 117.99016488773974}
{'epoch': 31, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 53.548317812993204, 'validate_perplexity': 117.52131040776554}
{'epoch': 32, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 53.45066347352705, 'validate_perplexity': 118.37562587855639}
{'epoch': 33, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 53.06592281944863, 'validate_perplexity': 117.63142162811671}
{'epoch': 34, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 52.91242464415376, 'validate_perplexity': 117.43582671954549}
{'epoch': 35, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 52.82485865714283, 'validate_perplexity': 117.46478113335166}
{'epoch': 36, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 52.871264305834465, 'validate_perplexity': 117.683373393332}
{'epoch': 37, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 52.47117480676677, 'validate_perplexity': 117.55594732111878}
{'epoch': 38, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 52.489492794185864, 'validate_perplexity': 117.56732704577851}
{'epoch': 39, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 52.540839735107134, 'validate_perplexity': 117.65711412654967}
{'epoch': 40, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 52.664197063011166, 'validate_perplexity': 117.48220202407109}
{'epoch': 41, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 52.67310010670035, 'validate_perplexity': 117.53235051624789}
{'epoch': 42, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 52.325447195260665, 'validate_perplexity': 117.61195964989167}
{'epoch': 43, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 52.37202595350191, 'validate_perplexity': 117.80605032477948}
{'epoch': 44, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 52.6718819709547, 'validate_perplexity': 117.64925992896276}
{'epoch': 45, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 52.36577060863238, 'validate_perplexity': 117.57988526175589}
{'epoch': 46, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 52.35755614100439, 'validate_perplexity': 117.65868502898694}
{'epoch': 47, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 52.32880317438614, 'validate_perplexity': 117.63731133400258}
Perplexity on Test: 112.035555
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 814.2377406173853, 'validate_perplexity': 433.6825573866593}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 332.33730643785464, 'validate_perplexity': 264.85667316770594}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 225.88786421666282, 'validate_perplexity': 211.02918085693614}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 184.2193042452299, 'validate_perplexity': 182.95948872530607}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 158.77932946283968, 'validate_perplexity': 173.4696266481328}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 142.07005567910977, 'validate_perplexity': 155.69883008564008}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 130.34190761993008, 'validate_perplexity': 156.67072184616117}
{'epoch': 8, 'learning_rate': 0.006666666666666667, 'train_perplexity': 112.85967890761472, 'validate_perplexity': 142.33491141687657}
{'epoch': 9, 'learning_rate': 0.006666666666666667, 'train_perplexity': 106.58730195188589, 'validate_perplexity': 141.7412672552812}
{'epoch': 10, 'learning_rate': 0.006666666666666667, 'train_perplexity': 100.86561452059061, 'validate_perplexity': 133.46972128117568}
{'epoch': 11, 'learning_rate': 0.006666666666666667, 'train_perplexity': 96.65218682572399, 'validate_perplexity': 137.72633686121569}
{'epoch': 12, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 87.6994455234734, 'validate_perplexity': 127.49039871752059}
{'epoch': 13, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 85.04913926552106, 'validate_perplexity': 127.04756918539847}
{'epoch': 14, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 82.26835254253787, 'validate_perplexity': 127.09264944679089}
{'epoch': 15, 'learning_rate': 0.002962962962962963, 'train_perplexity': 77.31547980404069, 'validate_perplexity': 124.420689392297}
{'epoch': 16, 'learning_rate': 0.002962962962962963, 'train_perplexity': 75.41186838578263, 'validate_perplexity': 122.1175484467677}
{'epoch': 17, 'learning_rate': 0.002962962962962963, 'train_perplexity': 74.17565984016541, 'validate_perplexity': 124.82751533869464}
{'epoch': 18, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 70.64420610967983, 'validate_perplexity': 121.31591583858041}
{'epoch': 19, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 69.90292583467667, 'validate_perplexity': 121.78660969931457}
{'epoch': 20, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 67.42858966971663, 'validate_perplexity': 122.02383382661743}
{'epoch': 21, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 65.92816439744178, 'validate_perplexity': 121.80170945433919}
{'epoch': 22, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 65.41716607946421, 'validate_perplexity': 119.04248632172774}
{'epoch': 23, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 65.12044380345722, 'validate_perplexity': 120.34835449657085}
{'epoch': 24, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 63.890414483040395, 'validate_perplexity': 120.70508056582923}
{'epoch': 25, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 63.34432490204454, 'validate_perplexity': 119.91169079535366}
{'epoch': 26, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 62.91407370756985, 'validate_perplexity': 120.25009144387349}
{'epoch': 27, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 62.61049087226408, 'validate_perplexity': 119.26191040736276}
{'epoch': 28, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 62.058560328533204, 'validate_perplexity': 119.49649594908422}
{'epoch': 29, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.9688436370396, 'validate_perplexity': 118.97308433401722}
{'epoch': 30, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.86875328542139, 'validate_perplexity': 119.4065578082003}
{'epoch': 31, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 61.68552692425581, 'validate_perplexity': 119.38139409227763}
{'epoch': 32, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 61.68032087456379, 'validate_perplexity': 119.3200441889799}
{'epoch': 33, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 61.44711812963475, 'validate_perplexity': 119.4228430389648}
{'epoch': 34, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 61.40081186091498, 'validate_perplexity': 119.14834123194181}
{'epoch': 35, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 61.65735479399742, 'validate_perplexity': 119.2484333319276}
{'epoch': 36, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 61.47537008193708, 'validate_perplexity': 119.34183743534642}
{'epoch': 37, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 61.28907098021885, 'validate_perplexity': 119.31355819495919}
{'epoch': 38, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 61.49727134664653, 'validate_perplexity': 119.24979802975413}
{'epoch': 39, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 61.28816501481535, 'validate_perplexity': 119.3033178695881}
{'epoch': 40, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 61.163853444771334, 'validate_perplexity': 119.22080156092596}
{'epoch': 41, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 61.39378549770098, 'validate_perplexity': 119.33125327291026}
Perplexity on Test: 113.159291
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 832.108452437421, 'validate_perplexity': 487.9955010623029}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 378.39381703470536, 'validate_perplexity': 305.4992101411892}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 263.29525443835666, 'validate_perplexity': 248.67982114279934}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 211.34860980069973, 'validate_perplexity': 212.7396834598847}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 180.58469533463833, 'validate_perplexity': 192.33162140605964}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 158.38649298626063, 'validate_perplexity': 173.77031423540834}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 143.31041719338384, 'validate_perplexity': 170.13591767818343}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 130.81674054962525, 'validate_perplexity': 159.12571004365316}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 120.91701739024793, 'validate_perplexity': 153.584547443856}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 113.47825717510634, 'validate_perplexity': 147.4927736585184}
{'epoch': 11, 'learning_rate': 0.01, 'train_perplexity': 107.20390605885936, 'validate_perplexity': 141.08186764456678}
{'epoch': 12, 'learning_rate': 0.01, 'train_perplexity': 101.57313059045511, 'validate_perplexity': 141.48837792267233}
{'epoch': 13, 'learning_rate': 0.006666666666666667, 'train_perplexity': 90.03327198833445, 'validate_perplexity': 134.74662048799624}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 86.5700326573004, 'validate_perplexity': 133.45292050151411}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 83.46242369098816, 'validate_perplexity': 131.7629942093799}
{'epoch': 16, 'learning_rate': 0.006666666666666667, 'train_perplexity': 80.80287900403839, 'validate_perplexity': 129.3207955239862}
{'epoch': 17, 'learning_rate': 0.006666666666666667, 'train_perplexity': 78.30779895416181, 'validate_perplexity': 129.22186112541422}
{'epoch': 18, 'learning_rate': 0.006666666666666667, 'train_perplexity': 75.58279599624016, 'validate_perplexity': 130.24057733687422}
{'epoch': 19, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 70.0690177622804, 'validate_perplexity': 127.14999234207315}
{'epoch': 20, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 68.53999674970959, 'validate_perplexity': 126.48860988025015}
{'epoch': 21, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 66.95629214749027, 'validate_perplexity': 127.74543340657253}
{'epoch': 22, 'learning_rate': 0.002962962962962963, 'train_perplexity': 62.965814572126305, 'validate_perplexity': 124.18863534383573}
{'epoch': 23, 'learning_rate': 0.002962962962962963, 'train_perplexity': 62.3704997649433, 'validate_perplexity': 122.35244171086015}
{'epoch': 24, 'learning_rate': 0.002962962962962963, 'train_perplexity': 61.384096290913, 'validate_perplexity': 124.66072606336473}
{'epoch': 25, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 59.00625141446982, 'validate_perplexity': 122.44874420489123}
{'epoch': 26, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 58.141630933188985, 'validate_perplexity': 121.56485918798923}
{'epoch': 27, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 57.40483079238991, 'validate_perplexity': 121.88868562629652}
{'epoch': 28, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 56.05585424240277, 'validate_perplexity': 122.25143475238652}
{'epoch': 29, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.72495454355854, 'validate_perplexity': 122.15103539354797}
{'epoch': 30, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.29385167288477, 'validate_perplexity': 121.85172624024368}
{'epoch': 31, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 54.14617305009219, 'validate_perplexity': 122.96809535736496}
{'epoch': 32, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 53.46776816289053, 'validate_perplexity': 122.2604123621255}
{'epoch': 33, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 53.40758188404062, 'validate_perplexity': 122.11323948911814}
{'epoch': 34, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 53.055422779804395, 'validate_perplexity': 122.5941000314962}
{'epoch': 35, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 52.214148303401835, 'validate_perplexity': 121.96508062255054}
{'epoch': 36, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 52.299367721352766, 'validate_perplexity': 122.65455990976017}
{'epoch': 37, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 51.98265428023608, 'validate_perplexity': 122.08133464248264}
{'epoch': 38, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 51.99809903789809, 'validate_perplexity': 122.30816800043193}
{'epoch': 39, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 51.63971850228065, 'validate_perplexity': 122.09484078620558}
{'epoch': 40, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 51.84853022895789, 'validate_perplexity': 122.18237584348309}
{'epoch': 41, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 51.492666021665215, 'validate_perplexity': 121.53871904331689}
{'epoch': 42, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 51.66357209876983, 'validate_perplexity': 122.52776880840355}
{'epoch': 43, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 51.26081413733247, 'validate_perplexity': 122.66766152985022}
{'epoch': 44, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 51.297039325966175, 'validate_perplexity': 122.39964964797053}
{'epoch': 45, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 51.55722108084619, 'validate_perplexity': 122.60924143883341}
{'epoch': 46, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 51.31922964753281, 'validate_perplexity': 122.17835589393046}
{'epoch': 47, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 51.27144797166053, 'validate_perplexity': 122.55166728811456}
{'epoch': 48, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 51.32198269938271, 'validate_perplexity': 122.39929946026487}
{'epoch': 49, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 51.18702481067846, 'validate_perplexity': 122.44261360668831}
{'epoch': 50, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 51.31855670176527, 'validate_perplexity': 122.27871940120285}
{'epoch': 51, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 51.258039924988005, 'validate_perplexity': 122.36988726946562}
{'epoch': 52, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 51.38876187442251, 'validate_perplexity': 122.44249683636816}
Perplexity on Test: 118.103869
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 724.4771138872437, 'validate_perplexity': 384.45694943820143}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 302.5674159530487, 'validate_perplexity': 244.26724253599173}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 214.524906554361, 'validate_perplexity': 202.6766095500043}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 175.52899469204525, 'validate_perplexity': 179.19398481654096}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 153.5293383156268, 'validate_perplexity': 167.23276978329127}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 138.73824857817672, 'validate_perplexity': 158.5217396689078}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 126.61672233332943, 'validate_perplexity': 149.59486732325007}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 117.35476994100537, 'validate_perplexity': 145.60540941211391}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 108.7088813831632, 'validate_perplexity': 141.68842368081835}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 102.88679663806562, 'validate_perplexity': 143.11470075025872}
{'epoch': 11, 'learning_rate': 0.006666666666666667, 'train_perplexity': 92.44683656658505, 'validate_perplexity': 131.32548647003688}
{'epoch': 12, 'learning_rate': 0.006666666666666667, 'train_perplexity': 88.24645574505453, 'validate_perplexity': 130.7821875450453}
{'epoch': 13, 'learning_rate': 0.006666666666666667, 'train_perplexity': 85.20428023508978, 'validate_perplexity': 131.6529001303884}
{'epoch': 14, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 79.42192439541321, 'validate_perplexity': 124.86680630351829}
{'epoch': 15, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 76.85114030397659, 'validate_perplexity': 124.91295920899894}
{'epoch': 16, 'learning_rate': 0.002962962962962963, 'train_perplexity': 73.02020607717431, 'validate_perplexity': 122.47770813306461}
{'epoch': 17, 'learning_rate': 0.002962962962962963, 'train_perplexity': 71.91998489549975, 'validate_perplexity': 120.77411074163217}
{'epoch': 18, 'learning_rate': 0.002962962962962963, 'train_perplexity': 70.67503532565036, 'validate_perplexity': 120.7208521230322}
{'epoch': 19, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 68.13726262272024, 'validate_perplexity': 119.70014618585206}
{'epoch': 20, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 67.15299695355525, 'validate_perplexity': 117.75471819934864}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 66.82621933685859, 'validate_perplexity': 120.19448478427435}
{'epoch': 22, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 65.345928124205, 'validate_perplexity': 117.31868186173892}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 64.75049118816665, 'validate_perplexity': 116.63333330628608}
{'epoch': 24, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 64.21912500051248, 'validate_perplexity': 117.42059631493585}
{'epoch': 25, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 62.92694492453574, 'validate_perplexity': 116.15718587419815}
{'epoch': 26, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 62.380255435400734, 'validate_perplexity': 117.03924969365939}
{'epoch': 27, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 62.396855478573386, 'validate_perplexity': 115.85847508048495}
{'epoch': 28, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 61.90144938502906, 'validate_perplexity': 117.30419380252343}
{'epoch': 29, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 60.97387534589812, 'validate_perplexity': 116.01641939521744}
{'epoch': 30, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 61.21225765145762, 'validate_perplexity': 116.08049873337794}
{'epoch': 31, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 60.65651162258658, 'validate_perplexity': 116.3106567152753}
{'epoch': 32, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 60.58274430792509, 'validate_perplexity': 116.0706465853596}
{'epoch': 33, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 60.440118562575904, 'validate_perplexity': 116.52387855135576}
{'epoch': 34, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 60.29940998533317, 'validate_perplexity': 115.88803526075804}
{'epoch': 35, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 59.999974612626595, 'validate_perplexity': 115.871292774693}
{'epoch': 36, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 60.09793003996547, 'validate_perplexity': 115.8365998779696}
{'epoch': 37, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 59.76163974815468, 'validate_perplexity': 115.93921706163913}
{'epoch': 38, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 60.39814226830901, 'validate_perplexity': 115.78038409459907}
{'epoch': 39, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 59.89769453084575, 'validate_perplexity': 115.71933971426846}
{'epoch': 40, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 60.27287680630936, 'validate_perplexity': 115.79175758139138}
{'epoch': 41, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 59.98804533803072, 'validate_perplexity': 115.76398836398316}
{'epoch': 42, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 60.24871102004218, 'validate_perplexity': 115.81826324466977}
{'epoch': 43, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 59.74237917093142, 'validate_perplexity': 115.73738474599367}
{'epoch': 44, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 60.04814493465253, 'validate_perplexity': 115.76768686133546}
Perplexity on Test: 111.431058
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 873.6641361504818, 'validate_perplexity': 475.35438172344686}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 385.5926619287691, 'validate_perplexity': 303.9554896851355}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 262.5251356423272, 'validate_perplexity': 247.9377909373604}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 210.75505107179296, 'validate_perplexity': 212.81273437317813}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 181.71296757583303, 'validate_perplexity': 193.75018332186963}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 160.84389886162697, 'validate_perplexity': 173.5786816081992}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 146.3462318821331, 'validate_perplexity': 168.7686887566153}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 134.19377528978703, 'validate_perplexity': 162.45114830867476}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 125.05247437510441, 'validate_perplexity': 158.0821197243234}
{'epoch': 10, 'learning_rate': 0.01, 'train_perplexity': 116.55710991230548, 'validate_perplexity': 166.7467274239986}
{'epoch': 11, 'learning_rate': 0.006666666666666667, 'train_perplexity': 103.40050915268739, 'validate_perplexity': 144.76717225702362}
{'epoch': 12, 'learning_rate': 0.006666666666666667, 'train_perplexity': 99.9693504637452, 'validate_perplexity': 138.07774109778546}
{'epoch': 13, 'learning_rate': 0.006666666666666667, 'train_perplexity': 95.5803295475634, 'validate_perplexity': 136.6087408213409}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 91.68980171633237, 'validate_perplexity': 132.82074215898413}
{'epoch': 15, 'learning_rate': 0.006666666666666667, 'train_perplexity': 88.52980807331424, 'validate_perplexity': 134.55887798054908}
{'epoch': 16, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 82.47243710701551, 'validate_perplexity': 131.80893057333097}
{'epoch': 17, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 79.98737736312603, 'validate_perplexity': 128.74563430731786}
{'epoch': 18, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 77.79475413281746, 'validate_perplexity': 129.05529246382022}
{'epoch': 19, 'learning_rate': 0.002962962962962963, 'train_perplexity': 74.72342713303287, 'validate_perplexity': 126.82089578531513}
{'epoch': 20, 'learning_rate': 0.002962962962962963, 'train_perplexity': 73.01181524115027, 'validate_perplexity': 125.5110902354314}
{'epoch': 21, 'learning_rate': 0.002962962962962963, 'train_perplexity': 71.66344338907362, 'validate_perplexity': 126.66237448327368}
{'epoch': 22, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 69.5418725761166, 'validate_perplexity': 125.71875607910782}
{'epoch': 23, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 68.40632545048254, 'validate_perplexity': 124.93589312315436}
{'epoch': 24, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 67.69934105304063, 'validate_perplexity': 124.0085663854702}
{'epoch': 25, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 66.75268302323146, 'validate_perplexity': 125.68159420051292}
{'epoch': 26, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 65.23932550095964, 'validate_perplexity': 123.34093787368471}
{'epoch': 27, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 64.59213974820815, 'validate_perplexity': 123.32288343771455}
{'epoch': 28, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 63.75368030770901, 'validate_perplexity': 123.06828602455832}
{'epoch': 29, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 63.12439284720123, 'validate_perplexity': 123.7143799327122}
{'epoch': 30, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 62.65998017006375, 'validate_perplexity': 122.51877156662023}
{'epoch': 31, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 62.41542419985461, 'validate_perplexity': 122.73728735605128}
{'epoch': 32, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 61.85447629385126, 'validate_perplexity': 122.27428814667967}
{'epoch': 33, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 61.63557285867749, 'validate_perplexity': 123.06124420221565}
{'epoch': 34, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 61.08865350024248, 'validate_perplexity': 122.30081976519453}
{'epoch': 35, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 61.201225476275035, 'validate_perplexity': 122.53542282635117}
{'epoch': 36, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 60.59739234702403, 'validate_perplexity': 122.55090760701523}
{'epoch': 37, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 60.80869403992451, 'validate_perplexity': 122.5624786448899}
{'epoch': 38, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 61.0073784970511, 'validate_perplexity': 122.48308122777057}
{'epoch': 39, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 60.38639297921943, 'validate_perplexity': 122.37286318162468}
{'epoch': 40, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 60.69348683783873, 'validate_perplexity': 122.29627107890865}
{'epoch': 41, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 60.604385361877746, 'validate_perplexity': 122.47379526677585}
{'epoch': 42, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 60.34678477565794, 'validate_perplexity': 122.40373524523314}
{'epoch': 43, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 60.56538504516233, 'validate_perplexity': 122.41033085429308}
{'epoch': 44, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 60.39569431124441, 'validate_perplexity': 122.19257196084628}
{'epoch': 45, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 60.47488559209782, 'validate_perplexity': 122.51888840968167}
{'epoch': 46, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 59.94158098231887, 'validate_perplexity': 122.38914445261248}
{'epoch': 47, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 60.255376475924855, 'validate_perplexity': 122.24286583077972}
{'epoch': 48, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 60.182957919058786, 'validate_perplexity': 122.384183979334}
{'epoch': 49, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 60.08351733460611, 'validate_perplexity': 122.3692454155467}
{'epoch': 50, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 60.31807356961376, 'validate_perplexity': 122.38325026567188}
Perplexity on Test: 118.989083
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 822.2999853824579, 'validate_perplexity': 435.43143209163503}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 341.1047312644295, 'validate_perplexity': 266.78483458765106}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 228.53519301924302, 'validate_perplexity': 209.5797253419226}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 185.2584153084861, 'validate_perplexity': 189.03828454150616}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 159.20145336199235, 'validate_perplexity': 168.56295873625837}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 140.60517051241862, 'validate_perplexity': 156.2958459943269}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 127.45271314961556, 'validate_perplexity': 159.62317720909263}
{'epoch': 8, 'learning_rate': 0.006666666666666667, 'train_perplexity': 110.9868383761067, 'validate_perplexity': 140.88670900863656}
{'epoch': 9, 'learning_rate': 0.006666666666666667, 'train_perplexity': 104.55981422700906, 'validate_perplexity': 137.39587320676137}
{'epoch': 10, 'learning_rate': 0.006666666666666667, 'train_perplexity': 99.74955187951825, 'validate_perplexity': 133.29799487933585}
{'epoch': 11, 'learning_rate': 0.006666666666666667, 'train_perplexity': 95.7015480292324, 'validate_perplexity': 133.83700216443825}
{'epoch': 12, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 87.4473928380833, 'validate_perplexity': 128.85090098253016}
{'epoch': 13, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 84.7314616244132, 'validate_perplexity': 125.97282965413203}
{'epoch': 14, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 82.43984235997335, 'validate_perplexity': 124.92070265726407}
{'epoch': 15, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 79.84257297005145, 'validate_perplexity': 125.92118131238202}
{'epoch': 16, 'learning_rate': 0.002962962962962963, 'train_perplexity': 75.5776423539015, 'validate_perplexity': 121.24403215858632}
{'epoch': 17, 'learning_rate': 0.002962962962962963, 'train_perplexity': 74.2981400492985, 'validate_perplexity': 120.24080276931791}
{'epoch': 18, 'learning_rate': 0.002962962962962963, 'train_perplexity': 72.82506448277614, 'validate_perplexity': 122.43531567569637}
{'epoch': 19, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 70.27707883494757, 'validate_perplexity': 119.62574029016359}
{'epoch': 20, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 69.13855914508918, 'validate_perplexity': 120.08632616238438}
{'epoch': 21, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 67.19582273963222, 'validate_perplexity': 118.4278498498035}
{'epoch': 22, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 66.52259828135765, 'validate_perplexity': 118.24000768759912}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 66.11913729153365, 'validate_perplexity': 118.86211396818264}
{'epoch': 24, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 64.8086248475331, 'validate_perplexity': 117.66822310118205}
{'epoch': 25, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 64.25006073504315, 'validate_perplexity': 117.88893656557937}
{'epoch': 26, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 63.45593971490867, 'validate_perplexity': 118.0421062307434}
{'epoch': 27, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 62.63476769958601, 'validate_perplexity': 118.03197503028657}
{'epoch': 28, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 62.1142771445868, 'validate_perplexity': 117.09021409611182}
{'epoch': 29, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 61.93814978091282, 'validate_perplexity': 117.35309117956284}
{'epoch': 30, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.41237783462273, 'validate_perplexity': 117.0970817521952}
{'epoch': 31, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 61.92657337363876, 'validate_perplexity': 117.42709139296718}
{'epoch': 32, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 61.61300537024032, 'validate_perplexity': 117.28098307481118}
{'epoch': 33, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 61.66211786185333, 'validate_perplexity': 117.04689573026324}
{'epoch': 34, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 61.277761984918826, 'validate_perplexity': 117.42479567778545}
{'epoch': 35, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 61.382896223704485, 'validate_perplexity': 117.1907006940937}
{'epoch': 36, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 61.20805468358277, 'validate_perplexity': 117.38706279260433}
{'epoch': 37, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 61.463147467255574, 'validate_perplexity': 117.23138903704894}
{'epoch': 38, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 61.22732065644918, 'validate_perplexity': 117.25112349757667}
{'epoch': 39, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 60.97532909370031, 'validate_perplexity': 117.22266891732848}
{'epoch': 40, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 61.11053355197347, 'validate_perplexity': 117.13421873929047}
{'epoch': 41, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 61.5849250929206, 'validate_perplexity': 117.1147273333288}
{'epoch': 42, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 61.1061044669642, 'validate_perplexity': 117.19366241819485}
{'epoch': 43, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 60.77634328555545, 'validate_perplexity': 117.16907678852358}
Perplexity on Test: 111.993039
{'epoch': 1, 'learning_rate': 0.01, 'train_perplexity': 864.7580366314617, 'validate_perplexity': 458.6779594999578}
{'epoch': 2, 'learning_rate': 0.01, 'train_perplexity': 340.61160368009865, 'validate_perplexity': 262.50673457152027}
{'epoch': 3, 'learning_rate': 0.01, 'train_perplexity': 229.06127056996425, 'validate_perplexity': 210.59482114716332}
{'epoch': 4, 'learning_rate': 0.01, 'train_perplexity': 183.34769015289677, 'validate_perplexity': 185.32715509713017}
{'epoch': 5, 'learning_rate': 0.01, 'train_perplexity': 154.24801916482036, 'validate_perplexity': 171.17180682496834}
{'epoch': 6, 'learning_rate': 0.01, 'train_perplexity': 136.10847096157727, 'validate_perplexity': 155.45476215243573}
{'epoch': 7, 'learning_rate': 0.01, 'train_perplexity': 122.44874420489123, 'validate_perplexity': 153.6638079631641}
{'epoch': 8, 'learning_rate': 0.01, 'train_perplexity': 111.98695133284703, 'validate_perplexity': 143.68202952931608}
{'epoch': 9, 'learning_rate': 0.01, 'train_perplexity': 104.18048625770822, 'validate_perplexity': 143.89354745816016}
{'epoch': 10, 'learning_rate': 0.006666666666666667, 'train_perplexity': 91.73738261940498, 'validate_perplexity': 132.99997114746813}
{'epoch': 11, 'learning_rate': 0.006666666666666667, 'train_perplexity': 87.63147523260567, 'validate_perplexity': 131.71606893689386}
{'epoch': 12, 'learning_rate': 0.006666666666666667, 'train_perplexity': 82.94843000955723, 'validate_perplexity': 130.2547998255901}
{'epoch': 13, 'learning_rate': 0.006666666666666667, 'train_perplexity': 80.02236026543056, 'validate_perplexity': 126.66219329142318}
{'epoch': 14, 'learning_rate': 0.006666666666666667, 'train_perplexity': 77.29999725910606, 'validate_perplexity': 128.76104429578132}
{'epoch': 15, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 71.78485598605756, 'validate_perplexity': 122.4301198139383}
{'epoch': 16, 'learning_rate': 0.0044444444444444444, 'train_perplexity': 69.58516003732633, 'validate_perplexity': 123.35911259718057}
{'epoch': 17, 'learning_rate': 0.002962962962962963, 'train_perplexity': 65.82516302400714, 'validate_perplexity': 119.6313305365875}
{'epoch': 18, 'learning_rate': 0.002962962962962963, 'train_perplexity': 64.85972790921144, 'validate_perplexity': 119.82721096696298}
{'epoch': 19, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 62.10326007865026, 'validate_perplexity': 118.99759455857846}
{'epoch': 20, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 61.66805751129365, 'validate_perplexity': 118.01565436959605}
{'epoch': 21, 'learning_rate': 0.0019753086419753087, 'train_perplexity': 60.488440347059175, 'validate_perplexity': 118.43857978773845}
{'epoch': 22, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 59.34791196877279, 'validate_perplexity': 117.08100201929548}
{'epoch': 23, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 58.72188922391251, 'validate_perplexity': 116.60969928059072}
{'epoch': 24, 'learning_rate': 0.0013168724279835392, 'train_perplexity': 58.27421986303218, 'validate_perplexity': 117.92092657185982}
{'epoch': 25, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 57.00866956393843, 'validate_perplexity': 116.29346500060687}
{'epoch': 26, 'learning_rate': 0.0008779149519890262, 'train_perplexity': 56.838454247774294, 'validate_perplexity': 116.50559978610275}
{'epoch': 27, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 56.11586737432916, 'validate_perplexity': 116.13863235520158}
{'epoch': 28, 'learning_rate': 0.0005852766346593508, 'train_perplexity': 55.830737114425524, 'validate_perplexity': 116.62410156386392}
{'epoch': 29, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 55.62896047656424, 'validate_perplexity': 115.5341959557656}
{'epoch': 30, 'learning_rate': 0.0003901844231062339, 'train_perplexity': 55.35934301445769, 'validate_perplexity': 116.25947725136756}
{'epoch': 31, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 55.38556183132391, 'validate_perplexity': 115.76095237237651}
{'epoch': 32, 'learning_rate': 0.00026012294873748923, 'train_perplexity': 55.11276143042505, 'validate_perplexity': 116.45383477587976}
{'epoch': 33, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 54.966865403988905, 'validate_perplexity': 116.03246357703037}
{'epoch': 34, 'learning_rate': 0.00017341529915832616, 'train_perplexity': 54.70823027394863, 'validate_perplexity': 116.34366083658237}
{'epoch': 35, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 54.47463943587658, 'validate_perplexity': 115.62232005244304}
{'epoch': 36, 'learning_rate': 0.00011561019943888411, 'train_perplexity': 54.52736929486217, 'validate_perplexity': 115.84632168024487}
{'epoch': 37, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 54.52070052934707, 'validate_perplexity': 115.60076505165775}
{'epoch': 38, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 54.25071131468163, 'validate_perplexity': 115.42549749203855}
{'epoch': 39, 'learning_rate': 7.70734662925894e-05, 'train_perplexity': 54.37306601738311, 'validate_perplexity': 116.12118921239261}
{'epoch': 40, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 54.242175297142595, 'validate_perplexity': 115.85267443496808}
{'epoch': 41, 'learning_rate': 5.138231086172627e-05, 'train_perplexity': 54.55820180988753, 'validate_perplexity': 115.92849243709816}
{'epoch': 42, 'learning_rate': 3.425487390781751e-05, 'train_perplexity': 53.980283024259656, 'validate_perplexity': 115.8737238766666}
{'epoch': 43, 'learning_rate': 2.2836582605211673e-05, 'train_perplexity': 54.15009766622437, 'validate_perplexity': 115.85129337169867}
{'epoch': 44, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 54.31593975968409, 'validate_perplexity': 115.69473237160534}
{'epoch': 45, 'learning_rate': 1.522438840347445e-05, 'train_perplexity': 54.21293022540412, 'validate_perplexity': 115.86731471784785}
{'epoch': 46, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 54.10999993989349, 'validate_perplexity': 115.70797334062196}
{'epoch': 47, 'learning_rate': 1.0149592268982966e-05, 'train_perplexity': 54.48002962547938, 'validate_perplexity': 115.83814647375708}
Perplexity on Test: 110.759346