-
Notifications
You must be signed in to change notification settings - Fork 0
/
training_log.txt
9708 lines (9708 loc) · 606 KB
/
training_log.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Epoch [1/3], Step [1/3236], Loss: 9.2222, Perplexity: 10119.8081
Epoch [1/3], Step [2/3236], Loss: 8.9599, Perplexity: 7784.4743
Epoch [1/3], Step [3/3236], Loss: 8.7379, Perplexity: 6234.9778
Epoch [1/3], Step [4/3236], Loss: 8.3448, Perplexity: 4208.3934
Epoch [1/3], Step [5/3236], Loss: 7.9231, Perplexity: 2760.3687
Epoch [1/3], Step [6/3236], Loss: 7.3448, Perplexity: 1548.1231
Epoch [1/3], Step [7/3236], Loss: 6.6534, Perplexity: 775.4096
Epoch [1/3], Step [8/3236], Loss: 5.8596, Perplexity: 350.5727
Epoch [1/3], Step [9/3236], Loss: 5.4005, Perplexity: 221.5211
Epoch [1/3], Step [10/3236], Loss: 5.1418, Perplexity: 171.0311
Epoch [1/3], Step [11/3236], Loss: 4.9463, Perplexity: 140.6581
Epoch [1/3], Step [12/3236], Loss: 4.7334, Perplexity: 113.6760
Epoch [1/3], Step [13/3236], Loss: 4.7012, Perplexity: 110.0836
Epoch [1/3], Step [14/3236], Loss: 4.6168, Perplexity: 101.1734
Epoch [1/3], Step [15/3236], Loss: 4.7380, Perplexity: 114.1999
Epoch [1/3], Step [16/3236], Loss: 4.6842, Perplexity: 108.2203
Epoch [1/3], Step [17/3236], Loss: 4.7004, Perplexity: 109.9936
Epoch [1/3], Step [18/3236], Loss: 4.6697, Perplexity: 106.6614
Epoch [1/3], Step [19/3236], Loss: 4.6944, Perplexity: 109.3329
Epoch [1/3], Step [20/3236], Loss: 4.6329, Perplexity: 102.8121
Epoch [1/3], Step [21/3236], Loss: 4.4227, Perplexity: 83.3241
Epoch [1/3], Step [22/3236], Loss: 4.3504, Perplexity: 77.5083
Epoch [1/3], Step [23/3236], Loss: 4.5945, Perplexity: 98.9370
Epoch [1/3], Step [24/3236], Loss: 4.5773, Perplexity: 97.2487
Epoch [1/3], Step [25/3236], Loss: 4.3762, Perplexity: 79.5368
Epoch [1/3], Step [26/3236], Loss: 4.2919, Perplexity: 73.1049
Epoch [1/3], Step [27/3236], Loss: 4.2987, Perplexity: 73.6025
Epoch [1/3], Step [28/3236], Loss: 4.3280, Perplexity: 75.7936
Epoch [1/3], Step [29/3236], Loss: 4.4594, Perplexity: 86.4350
Epoch [1/3], Step [30/3236], Loss: 4.2410, Perplexity: 69.4792
Epoch [1/3], Step [31/3236], Loss: 4.2691, Perplexity: 71.4567
Epoch [1/3], Step [32/3236], Loss: 4.1340, Perplexity: 62.4293
Epoch [1/3], Step [33/3236], Loss: 4.3162, Perplexity: 74.9069
Epoch [1/3], Step [34/3236], Loss: 4.3710, Perplexity: 79.1212
Epoch [1/3], Step [35/3236], Loss: 4.1243, Perplexity: 61.8220
Epoch [1/3], Step [36/3236], Loss: 4.3204, Perplexity: 75.2174
Epoch [1/3], Step [37/3236], Loss: 4.3087, Perplexity: 74.3420
Epoch [1/3], Step [38/3236], Loss: 4.0647, Perplexity: 58.2451
Epoch [1/3], Step [39/3236], Loss: 4.0999, Perplexity: 60.3326
Epoch [1/3], Step [40/3236], Loss: 4.0279, Perplexity: 56.1410
Epoch [1/3], Step [41/3236], Loss: 4.0969, Perplexity: 60.1524
Epoch [1/3], Step [42/3236], Loss: 4.0332, Perplexity: 56.4432
Epoch [1/3], Step [43/3236], Loss: 4.0432, Perplexity: 57.0067
Epoch [1/3], Step [44/3236], Loss: 4.0604, Perplexity: 57.9948
Epoch [1/3], Step [45/3236], Loss: 4.1130, Perplexity: 61.1286
Epoch [1/3], Step [46/3236], Loss: 4.3886, Perplexity: 80.5274
Epoch [1/3], Step [47/3236], Loss: 4.0479, Perplexity: 57.2796
Epoch [1/3], Step [48/3236], Loss: 4.1846, Perplexity: 65.6697
Epoch [1/3], Step [49/3236], Loss: 4.0579, Perplexity: 57.8551
Epoch [1/3], Step [50/3236], Loss: 4.1244, Perplexity: 61.8310
Epoch [1/3], Step [51/3236], Loss: 3.9409, Perplexity: 51.4637
Epoch [1/3], Step [52/3236], Loss: 4.0012, Perplexity: 54.6647
Epoch [1/3], Step [53/3236], Loss: 4.0632, Perplexity: 58.1615
Epoch [1/3], Step [54/3236], Loss: 4.0012, Perplexity: 54.6624
Epoch [1/3], Step [55/3236], Loss: 3.9227, Perplexity: 50.5388
Epoch [1/3], Step [56/3236], Loss: 4.8273, Perplexity: 124.8763
Epoch [1/3], Step [57/3236], Loss: 3.8718, Perplexity: 48.0287
Epoch [1/3], Step [58/3236], Loss: 3.8701, Perplexity: 47.9464
Epoch [1/3], Step [59/3236], Loss: 3.8084, Perplexity: 45.0766
Epoch [1/3], Step [60/3236], Loss: 4.0130, Perplexity: 55.3135
Epoch [1/3], Step [61/3236], Loss: 3.9273, Perplexity: 50.7707
Epoch [1/3], Step [62/3236], Loss: 3.8100, Perplexity: 45.1510
Epoch [1/3], Step [63/3236], Loss: 3.7824, Perplexity: 43.9196
Epoch [1/3], Step [64/3236], Loss: 3.8291, Perplexity: 46.0209
Epoch [1/3], Step [65/3236], Loss: 3.9290, Perplexity: 50.8566
Epoch [1/3], Step [66/3236], Loss: 4.1535, Perplexity: 63.6569
Epoch [1/3], Step [67/3236], Loss: 3.7226, Perplexity: 41.3729
Epoch [1/3], Step [68/3236], Loss: 3.9497, Perplexity: 51.9172
Epoch [1/3], Step [69/3236], Loss: 3.8006, Perplexity: 44.7295
Epoch [1/3], Step [70/3236], Loss: 3.7500, Perplexity: 42.5225
Epoch [1/3], Step [71/3236], Loss: 3.7877, Perplexity: 44.1562
Epoch [1/3], Step [72/3236], Loss: 3.8163, Perplexity: 45.4342
Epoch [1/3], Step [73/3236], Loss: 3.7098, Perplexity: 40.8470
Epoch [1/3], Step [74/3236], Loss: 3.6285, Perplexity: 37.6562
Epoch [1/3], Step [75/3236], Loss: 3.7639, Perplexity: 43.1173
Epoch [1/3], Step [76/3236], Loss: 3.8892, Perplexity: 48.8696
Epoch [1/3], Step [77/3236], Loss: 3.9653, Perplexity: 52.7346
Epoch [1/3], Step [78/3236], Loss: 3.7310, Perplexity: 41.7204
Epoch [1/3], Step [79/3236], Loss: 3.6447, Perplexity: 38.2715
Epoch [1/3], Step [80/3236], Loss: 5.2081, Perplexity: 182.7526
Epoch [1/3], Step [81/3236], Loss: 3.7047, Perplexity: 40.6392
Epoch [1/3], Step [82/3236], Loss: 3.6895, Perplexity: 40.0266
Epoch [1/3], Step [83/3236], Loss: 3.7342, Perplexity: 41.8556
Epoch [1/3], Step [84/3236], Loss: 3.7245, Perplexity: 41.4513
Epoch [1/3], Step [85/3236], Loss: 3.8082, Perplexity: 45.0689
Epoch [1/3], Step [86/3236], Loss: 3.7638, Perplexity: 43.1109
Epoch [1/3], Step [87/3236], Loss: 3.7813, Perplexity: 43.8742
Epoch [1/3], Step [88/3236], Loss: 3.7554, Perplexity: 42.7501
Epoch [1/3], Step [89/3236], Loss: 3.5927, Perplexity: 36.3318
Epoch [1/3], Step [90/3236], Loss: 3.6556, Perplexity: 38.6890
Epoch [1/3], Step [91/3236], Loss: 3.5765, Perplexity: 35.7466
Epoch [1/3], Step [92/3236], Loss: 3.8339, Perplexity: 46.2407
Epoch [1/3], Step [93/3236], Loss: 3.5202, Perplexity: 33.7929
Epoch [1/3], Step [94/3236], Loss: 3.6599, Perplexity: 38.8585
Epoch [1/3], Step [95/3236], Loss: 3.7432, Perplexity: 42.2328
Epoch [1/3], Step [96/3236], Loss: 3.7462, Perplexity: 42.3590
Epoch [1/3], Step [97/3236], Loss: 3.7054, Perplexity: 40.6666
Epoch [1/3], Step [98/3236], Loss: 3.4217, Perplexity: 30.6226
Epoch [1/3], Step [99/3236], Loss: 3.6837, Perplexity: 39.7921
Epoch [1/3], Step [100/3236], Loss: 4.0734, Perplexity: 58.7584
Epoch [1/3], Step [101/3236], Loss: 3.6107, Perplexity: 36.9928
Epoch [1/3], Step [102/3236], Loss: 3.6247, Perplexity: 37.5117
Epoch [1/3], Step [103/3236], Loss: 3.6528, Perplexity: 38.5807
Epoch [1/3], Step [104/3236], Loss: 3.6431, Perplexity: 38.2119
Epoch [1/3], Step [105/3236], Loss: 3.5513, Perplexity: 34.8584
Epoch [1/3], Step [106/3236], Loss: 3.6161, Perplexity: 37.1907
Epoch [1/3], Step [107/3236], Loss: 3.6272, Perplexity: 37.6061
Epoch [1/3], Step [108/3236], Loss: 3.5055, Perplexity: 33.2976
Epoch [1/3], Step [109/3236], Loss: 3.5031, Perplexity: 33.2179
Epoch [1/3], Step [110/3236], Loss: 3.5878, Perplexity: 36.1539
Epoch [1/3], Step [111/3236], Loss: 3.5627, Perplexity: 35.2590
Epoch [1/3], Step [112/3236], Loss: 3.5829, Perplexity: 35.9766
Epoch [1/3], Step [113/3236], Loss: 3.5030, Perplexity: 33.2147
Epoch [1/3], Step [114/3236], Loss: 3.6102, Perplexity: 36.9734
Epoch [1/3], Step [115/3236], Loss: 3.5452, Perplexity: 34.6481
Epoch [1/3], Step [116/3236], Loss: 3.4900, Perplexity: 32.7862
Epoch [1/3], Step [117/3236], Loss: 3.5004, Perplexity: 33.1288
Epoch [1/3], Step [118/3236], Loss: 3.3593, Perplexity: 28.7702
Epoch [1/3], Step [119/3236], Loss: 3.3908, Perplexity: 29.6902
Epoch [1/3], Step [120/3236], Loss: 3.6826, Perplexity: 39.7498
Epoch [1/3], Step [121/3236], Loss: 4.1208, Perplexity: 61.6099
Epoch [1/3], Step [122/3236], Loss: 3.7408, Perplexity: 42.1299
Epoch [1/3], Step [123/3236], Loss: 3.4818, Perplexity: 32.5168
Epoch [1/3], Step [124/3236], Loss: 3.6786, Perplexity: 39.5905
Epoch [1/3], Step [125/3236], Loss: 4.3509, Perplexity: 77.5481
Epoch [1/3], Step [126/3236], Loss: 3.4851, Perplexity: 32.6265
Epoch [1/3], Step [127/3236], Loss: 3.5109, Perplexity: 33.4780
Epoch [1/3], Step [128/3236], Loss: 3.5351, Perplexity: 34.2990
Epoch [1/3], Step [129/3236], Loss: 3.3898, Perplexity: 29.6595
Epoch [1/3], Step [130/3236], Loss: 3.4621, Perplexity: 31.8850
Epoch [1/3], Step [131/3236], Loss: 3.5279, Perplexity: 34.0512
Epoch [1/3], Step [132/3236], Loss: 3.4377, Perplexity: 31.1165
Epoch [1/3], Step [133/3236], Loss: 3.5236, Perplexity: 33.9047
Epoch [1/3], Step [134/3236], Loss: 3.5730, Perplexity: 35.6249
Epoch [1/3], Step [135/3236], Loss: 3.4620, Perplexity: 31.8822
Epoch [1/3], Step [136/3236], Loss: 3.5098, Perplexity: 33.4414
Epoch [1/3], Step [137/3236], Loss: 3.4746, Perplexity: 32.2841
Epoch [1/3], Step [138/3236], Loss: 3.3975, Perplexity: 29.8886
Epoch [1/3], Step [139/3236], Loss: 3.5668, Perplexity: 35.4029
Epoch [1/3], Step [140/3236], Loss: 4.3918, Perplexity: 80.7872
Epoch [1/3], Step [141/3236], Loss: 3.3861, Perplexity: 29.5491
Epoch [1/3], Step [142/3236], Loss: 3.7855, Perplexity: 44.0574
Epoch [1/3], Step [143/3236], Loss: 3.5224, Perplexity: 33.8660
Epoch [1/3], Step [144/3236], Loss: 3.4769, Perplexity: 32.3588
Epoch [1/3], Step [145/3236], Loss: 3.4525, Perplexity: 31.5798
Epoch [1/3], Step [146/3236], Loss: 3.4971, Perplexity: 33.0191
Epoch [1/3], Step [147/3236], Loss: 3.3873, Perplexity: 29.5874
Epoch [1/3], Step [148/3236], Loss: 3.2972, Perplexity: 27.0376
Epoch [1/3], Step [149/3236], Loss: 3.4045, Perplexity: 30.0999
Epoch [1/3], Step [150/3236], Loss: 3.3088, Perplexity: 27.3520
Epoch [1/3], Step [151/3236], Loss: 4.2604, Perplexity: 70.8404
Epoch [1/3], Step [152/3236], Loss: 3.3812, Perplexity: 29.4067
Epoch [1/3], Step [153/3236], Loss: 3.3482, Perplexity: 28.4528
Epoch [1/3], Step [154/3236], Loss: 3.3064, Perplexity: 27.2868
Epoch [1/3], Step [155/3236], Loss: 3.4883, Perplexity: 32.7319
Epoch [1/3], Step [156/3236], Loss: 3.4992, Perplexity: 33.0876
Epoch [1/3], Step [157/3236], Loss: 3.3791, Perplexity: 29.3431
Epoch [1/3], Step [158/3236], Loss: 3.9048, Perplexity: 49.6393
Epoch [1/3], Step [159/3236], Loss: 3.4104, Perplexity: 30.2782
Epoch [1/3], Step [160/3236], Loss: 3.4439, Perplexity: 31.3083
Epoch [1/3], Step [161/3236], Loss: 3.3780, Perplexity: 29.3128
Epoch [1/3], Step [162/3236], Loss: 3.6665, Perplexity: 39.1156
Epoch [1/3], Step [163/3236], Loss: 4.1140, Perplexity: 61.1912
Epoch [1/3], Step [164/3236], Loss: 3.5583, Perplexity: 35.1052
Epoch [1/3], Step [165/3236], Loss: 4.2100, Perplexity: 67.3588
Epoch [1/3], Step [166/3236], Loss: 3.3606, Perplexity: 28.8076
Epoch [1/3], Step [167/3236], Loss: 3.3136, Perplexity: 27.4847
Epoch [1/3], Step [168/3236], Loss: 3.2885, Perplexity: 26.8035
Epoch [1/3], Step [169/3236], Loss: 3.1875, Perplexity: 24.2268
Epoch [1/3], Step [170/3236], Loss: 3.4357, Perplexity: 31.0528
Epoch [1/3], Step [171/3236], Loss: 3.1591, Perplexity: 23.5486
Epoch [1/3], Step [172/3236], Loss: 3.3397, Perplexity: 28.2117
Epoch [1/3], Step [173/3236], Loss: 3.2903, Perplexity: 26.8518
Epoch [1/3], Step [174/3236], Loss: 3.3378, Perplexity: 28.1580
Epoch [1/3], Step [175/3236], Loss: 3.3338, Perplexity: 28.0436
Epoch [1/3], Step [176/3236], Loss: 3.2755, Perplexity: 26.4557
Epoch [1/3], Step [177/3236], Loss: 3.3880, Perplexity: 29.6058
Epoch [1/3], Step [178/3236], Loss: 3.3959, Perplexity: 29.8407
Epoch [1/3], Step [179/3236], Loss: 3.7205, Perplexity: 41.2835
Epoch [1/3], Step [180/3236], Loss: 3.2629, Perplexity: 26.1259
Epoch [1/3], Step [181/3236], Loss: 3.3516, Perplexity: 28.5494
Epoch [1/3], Step [182/3236], Loss: 3.4118, Perplexity: 30.3199
Epoch [1/3], Step [183/3236], Loss: 3.2706, Perplexity: 26.3281
Epoch [1/3], Step [184/3236], Loss: 3.4328, Perplexity: 30.9625
Epoch [1/3], Step [185/3236], Loss: 3.4364, Perplexity: 31.0742
Epoch [1/3], Step [186/3236], Loss: 3.3656, Perplexity: 28.9509
Epoch [1/3], Step [187/3236], Loss: 3.2897, Perplexity: 26.8348
Epoch [1/3], Step [188/3236], Loss: 3.3451, Perplexity: 28.3646
Epoch [1/3], Step [189/3236], Loss: 3.3045, Perplexity: 27.2337
Epoch [1/3], Step [190/3236], Loss: 3.5584, Perplexity: 35.1079
Epoch [1/3], Step [191/3236], Loss: 3.4016, Perplexity: 30.0135
Epoch [1/3], Step [192/3236], Loss: 3.3696, Perplexity: 29.0679
Epoch [1/3], Step [193/3236], Loss: 3.4726, Perplexity: 32.2188
Epoch [1/3], Step [194/3236], Loss: 3.4698, Perplexity: 32.1314
Epoch [1/3], Step [195/3236], Loss: 3.8899, Perplexity: 48.9084
Epoch [1/3], Step [196/3236], Loss: 3.2921, Perplexity: 26.8996
Epoch [1/3], Step [197/3236], Loss: 3.2313, Perplexity: 25.3137
Epoch [1/3], Step [198/3236], Loss: 3.1702, Perplexity: 23.8133
Epoch [1/3], Step [199/3236], Loss: 3.2872, Perplexity: 26.7686
Epoch [1/3], Step [200/3236], Loss: 3.4223, Perplexity: 30.6390
Epoch [1/3], Step [201/3236], Loss: 3.1055, Perplexity: 22.3208
Epoch [1/3], Step [202/3236], Loss: 3.3125, Perplexity: 27.4546
Epoch [1/3], Step [203/3236], Loss: 3.6019, Perplexity: 36.6675
Epoch [1/3], Step [204/3236], Loss: 3.7654, Perplexity: 43.1803
Epoch [1/3], Step [205/3236], Loss: 3.3089, Perplexity: 27.3551
Epoch [1/3], Step [206/3236], Loss: 3.3180, Perplexity: 27.6043
Epoch [1/3], Step [207/3236], Loss: 3.2032, Perplexity: 24.6118
Epoch [1/3], Step [208/3236], Loss: 3.3494, Perplexity: 28.4844
Epoch [1/3], Step [209/3236], Loss: 3.2449, Perplexity: 25.6586
Epoch [1/3], Step [210/3236], Loss: 3.2040, Perplexity: 24.6309
Epoch [1/3], Step [211/3236], Loss: 3.0143, Perplexity: 20.3751
Epoch [1/3], Step [212/3236], Loss: 3.2350, Perplexity: 25.4075
Epoch [1/3], Step [213/3236], Loss: 3.2449, Perplexity: 25.6597
Epoch [1/3], Step [214/3236], Loss: 3.2603, Perplexity: 26.0585
Epoch [1/3], Step [215/3236], Loss: 3.2999, Perplexity: 27.1095
Epoch [1/3], Step [216/3236], Loss: 3.1772, Perplexity: 23.9802
Epoch [1/3], Step [217/3236], Loss: 3.2919, Perplexity: 26.8935
Epoch [1/3], Step [218/3236], Loss: 3.1516, Perplexity: 23.3736
Epoch [1/3], Step [219/3236], Loss: 3.2176, Perplexity: 24.9672
Epoch [1/3], Step [220/3236], Loss: 3.3398, Perplexity: 28.2146
Epoch [1/3], Step [221/3236], Loss: 3.2541, Perplexity: 25.8976
Epoch [1/3], Step [222/3236], Loss: 3.1593, Perplexity: 23.5542
Epoch [1/3], Step [223/3236], Loss: 3.3378, Perplexity: 28.1583
Epoch [1/3], Step [224/3236], Loss: 3.3465, Perplexity: 28.4039
Epoch [1/3], Step [225/3236], Loss: 3.1215, Perplexity: 22.6806
Epoch [1/3], Step [226/3236], Loss: 3.2810, Perplexity: 26.6013
Epoch [1/3], Step [227/3236], Loss: 3.2150, Perplexity: 24.9041
Epoch [1/3], Step [228/3236], Loss: 3.1048, Perplexity: 22.3051
Epoch [1/3], Step [229/3236], Loss: 3.3622, Perplexity: 28.8520
Epoch [1/3], Step [230/3236], Loss: 3.7003, Perplexity: 40.4599
Epoch [1/3], Step [231/3236], Loss: 3.3874, Perplexity: 29.5891
Epoch [1/3], Step [232/3236], Loss: 3.1836, Perplexity: 24.1346
Epoch [1/3], Step [233/3236], Loss: 3.5456, Perplexity: 34.6588
Epoch [1/3], Step [234/3236], Loss: 3.3345, Perplexity: 28.0644
Epoch [1/3], Step [235/3236], Loss: 3.3415, Perplexity: 28.2604
Epoch [1/3], Step [236/3236], Loss: 3.9304, Perplexity: 50.9277
Epoch [1/3], Step [237/3236], Loss: 3.2307, Perplexity: 25.2986
Epoch [1/3], Step [238/3236], Loss: 3.1415, Perplexity: 23.1386
Epoch [1/3], Step [239/3236], Loss: 3.3464, Perplexity: 28.4013
Epoch [1/3], Step [240/3236], Loss: 3.3045, Perplexity: 27.2347
Epoch [1/3], Step [241/3236], Loss: 3.4432, Perplexity: 31.2884
Epoch [1/3], Step [242/3236], Loss: 3.3766, Perplexity: 29.2715
Epoch [1/3], Step [243/3236], Loss: 3.3236, Perplexity: 27.7611
Epoch [1/3], Step [244/3236], Loss: 3.2664, Perplexity: 26.2174
Epoch [1/3], Step [245/3236], Loss: 3.1576, Perplexity: 23.5135
Epoch [1/3], Step [246/3236], Loss: 3.3996, Perplexity: 29.9521
Epoch [1/3], Step [247/3236], Loss: 3.3228, Perplexity: 27.7365
Epoch [1/3], Step [248/3236], Loss: 3.3144, Perplexity: 27.5050
Epoch [1/3], Step [249/3236], Loss: 3.2473, Perplexity: 25.7206
Epoch [1/3], Step [250/3236], Loss: 3.4387, Perplexity: 31.1461
Epoch [1/3], Step [251/3236], Loss: 3.2201, Perplexity: 25.0296
Epoch [1/3], Step [252/3236], Loss: 3.3941, Perplexity: 29.7871
Epoch [1/3], Step [253/3236], Loss: 3.3466, Perplexity: 28.4048
Epoch [1/3], Step [254/3236], Loss: 3.2583, Perplexity: 26.0048
Epoch [1/3], Step [255/3236], Loss: 3.3008, Perplexity: 27.1332
Epoch [1/3], Step [256/3236], Loss: 3.2708, Perplexity: 26.3314
Epoch [1/3], Step [257/3236], Loss: 3.2883, Perplexity: 26.7966
Epoch [1/3], Step [258/3236], Loss: 3.2280, Perplexity: 25.2290
Epoch [1/3], Step [259/3236], Loss: 3.1932, Perplexity: 24.3660
Epoch [1/3], Step [260/3236], Loss: 3.9703, Perplexity: 53.0010
Epoch [1/3], Step [261/3236], Loss: 3.5272, Perplexity: 34.0302
Epoch [1/3], Step [262/3236], Loss: 3.2423, Perplexity: 25.5921
Epoch [1/3], Step [263/3236], Loss: 3.1567, Perplexity: 23.4938
Epoch [1/3], Step [264/3236], Loss: 3.3118, Perplexity: 27.4354
Epoch [1/3], Step [265/3236], Loss: 3.2465, Perplexity: 25.7000
Epoch [1/3], Step [266/3236], Loss: 3.2187, Perplexity: 24.9966
Epoch [1/3], Step [267/3236], Loss: 3.0987, Perplexity: 22.1696
Epoch [1/3], Step [268/3236], Loss: 3.1849, Perplexity: 24.1637
Epoch [1/3], Step [269/3236], Loss: 3.0904, Perplexity: 21.9855
Epoch [1/3], Step [270/3236], Loss: 3.1997, Perplexity: 24.5260
Epoch [1/3], Step [271/3236], Loss: 3.0549, Perplexity: 21.2183
Epoch [1/3], Step [272/3236], Loss: 3.2731, Perplexity: 26.3917
Epoch [1/3], Step [273/3236], Loss: 3.3766, Perplexity: 29.2713
Epoch [1/3], Step [274/3236], Loss: 3.1429, Perplexity: 23.1699
Epoch [1/3], Step [275/3236], Loss: 3.2328, Perplexity: 25.3500
Epoch [1/3], Step [276/3236], Loss: 3.3017, Perplexity: 27.1583
Epoch [1/3], Step [277/3236], Loss: 3.0633, Perplexity: 21.3985
Epoch [1/3], Step [278/3236], Loss: 3.1024, Perplexity: 22.2510
Epoch [1/3], Step [279/3236], Loss: 3.4813, Perplexity: 32.5011
Epoch [1/3], Step [280/3236], Loss: 3.1071, Perplexity: 22.3556
Epoch [1/3], Step [281/3236], Loss: 3.6083, Perplexity: 36.9024
Epoch [1/3], Step [282/3236], Loss: 3.2219, Perplexity: 25.0748
Epoch [1/3], Step [283/3236], Loss: 2.9989, Perplexity: 20.0632
Epoch [1/3], Step [284/3236], Loss: 3.1311, Perplexity: 22.8985
Epoch [1/3], Step [285/3236], Loss: 3.2457, Perplexity: 25.6795
Epoch [1/3], Step [286/3236], Loss: 3.1413, Perplexity: 23.1347
Epoch [1/3], Step [287/3236], Loss: 3.1130, Perplexity: 22.4878
Epoch [1/3], Step [288/3236], Loss: 3.0904, Perplexity: 21.9866
Epoch [1/3], Step [289/3236], Loss: 3.2094, Perplexity: 24.7649
Epoch [1/3], Step [290/3236], Loss: 3.8441, Perplexity: 46.7157
Epoch [1/3], Step [291/3236], Loss: 3.0945, Perplexity: 22.0770
Epoch [1/3], Step [292/3236], Loss: 3.5393, Perplexity: 34.4427
Epoch [1/3], Step [293/3236], Loss: 3.1406, Perplexity: 23.1170
Epoch [1/3], Step [294/3236], Loss: 3.1708, Perplexity: 23.8273
Epoch [1/3], Step [295/3236], Loss: 3.8164, Perplexity: 45.4388
Epoch [1/3], Step [296/3236], Loss: 3.5390, Perplexity: 34.4320
Epoch [1/3], Step [297/3236], Loss: 3.1443, Perplexity: 23.2039
Epoch [1/3], Step [298/3236], Loss: 3.1649, Perplexity: 23.6855
Epoch [1/3], Step [299/3236], Loss: 3.5703, Perplexity: 35.5279
Epoch [1/3], Step [300/3236], Loss: 3.2764, Perplexity: 26.4809
Epoch [1/3], Step [301/3236], Loss: 3.2912, Perplexity: 26.8753
Epoch [1/3], Step [302/3236], Loss: 3.1076, Perplexity: 22.3668
Epoch [1/3], Step [303/3236], Loss: 2.9906, Perplexity: 19.8976
Epoch [1/3], Step [304/3236], Loss: 3.1813, Perplexity: 24.0787
Epoch [1/3], Step [305/3236], Loss: 3.1813, Perplexity: 24.0791
Epoch [1/3], Step [306/3236], Loss: 3.2225, Perplexity: 25.0904
Epoch [1/3], Step [307/3236], Loss: 3.2136, Perplexity: 24.8680
Epoch [1/3], Step [308/3236], Loss: 3.0412, Perplexity: 20.9307
Epoch [1/3], Step [309/3236], Loss: 2.9787, Perplexity: 19.6632
Epoch [1/3], Step [310/3236], Loss: 3.1473, Perplexity: 23.2722
Epoch [1/3], Step [311/3236], Loss: 3.1377, Perplexity: 23.0497
Epoch [1/3], Step [312/3236], Loss: 3.2325, Perplexity: 25.3435
Epoch [1/3], Step [313/3236], Loss: 3.0006, Perplexity: 20.0984
Epoch [1/3], Step [314/3236], Loss: 3.0986, Perplexity: 22.1659
Epoch [1/3], Step [315/3236], Loss: 2.9621, Perplexity: 19.3379
Epoch [1/3], Step [316/3236], Loss: 3.0701, Perplexity: 21.5439
Epoch [1/3], Step [317/3236], Loss: 3.1670, Perplexity: 23.7372
Epoch [1/3], Step [318/3236], Loss: 2.9591, Perplexity: 19.2800
Epoch [1/3], Step [319/3236], Loss: 3.1697, Perplexity: 23.8004
Epoch [1/3], Step [320/3236], Loss: 3.1488, Perplexity: 23.3085
Epoch [1/3], Step [321/3236], Loss: 3.0396, Perplexity: 20.8964
Epoch [1/3], Step [322/3236], Loss: 2.9924, Perplexity: 19.9332
Epoch [1/3], Step [323/3236], Loss: 3.1533, Perplexity: 23.4135
Epoch [1/3], Step [324/3236], Loss: 3.0720, Perplexity: 21.5845
Epoch [1/3], Step [325/3236], Loss: 3.1162, Perplexity: 22.5615
Epoch [1/3], Step [326/3236], Loss: 3.1142, Perplexity: 22.5149
Epoch [1/3], Step [327/3236], Loss: 3.0932, Perplexity: 22.0467
Epoch [1/3], Step [328/3236], Loss: 2.9953, Perplexity: 19.9917
Epoch [1/3], Step [329/3236], Loss: 3.2726, Perplexity: 26.3806
Epoch [1/3], Step [330/3236], Loss: 2.9881, Perplexity: 19.8480
Epoch [1/3], Step [331/3236], Loss: 3.1748, Perplexity: 23.9218
Epoch [1/3], Step [332/3236], Loss: 3.1093, Perplexity: 22.4050
Epoch [1/3], Step [333/3236], Loss: 2.8848, Perplexity: 17.8993
Epoch [1/3], Step [334/3236], Loss: 3.3301, Perplexity: 27.9411
Epoch [1/3], Step [335/3236], Loss: 3.1873, Perplexity: 24.2238
Epoch [1/3], Step [336/3236], Loss: 3.1346, Perplexity: 22.9791
Epoch [1/3], Step [337/3236], Loss: 3.0102, Perplexity: 20.2917
Epoch [1/3], Step [338/3236], Loss: 3.0564, Perplexity: 21.2510
Epoch [1/3], Step [339/3236], Loss: 3.2870, Perplexity: 26.7623
Epoch [1/3], Step [340/3236], Loss: 3.1972, Perplexity: 24.4640
Epoch [1/3], Step [341/3236], Loss: 3.0542, Perplexity: 21.2051
Epoch [1/3], Step [342/3236], Loss: 3.1512, Perplexity: 23.3638
Epoch [1/3], Step [343/3236], Loss: 3.0635, Perplexity: 21.4023
Epoch [1/3], Step [344/3236], Loss: 3.0224, Perplexity: 20.5402
Epoch [1/3], Step [345/3236], Loss: 3.0837, Perplexity: 21.8399
Epoch [1/3], Step [346/3236], Loss: 3.2817, Perplexity: 26.6220
Epoch [1/3], Step [347/3236], Loss: 3.1689, Perplexity: 23.7813
Epoch [1/3], Step [348/3236], Loss: 3.5251, Perplexity: 33.9578
Epoch [1/3], Step [349/3236], Loss: 2.9760, Perplexity: 19.6102
Epoch [1/3], Step [350/3236], Loss: 3.4850, Perplexity: 32.6209
Epoch [1/3], Step [351/3236], Loss: 3.0942, Perplexity: 22.0704
Epoch [1/3], Step [352/3236], Loss: 2.9755, Perplexity: 19.5996
Epoch [1/3], Step [353/3236], Loss: 3.1597, Perplexity: 23.5646
Epoch [1/3], Step [354/3236], Loss: 2.9332, Perplexity: 18.7885
Epoch [1/3], Step [355/3236], Loss: 3.0455, Perplexity: 21.0198
Epoch [1/3], Step [356/3236], Loss: 3.0320, Perplexity: 20.7394
Epoch [1/3], Step [357/3236], Loss: 2.8812, Perplexity: 17.8355
Epoch [1/3], Step [358/3236], Loss: 3.1689, Perplexity: 23.7809
Epoch [1/3], Step [359/3236], Loss: 3.0649, Perplexity: 21.4313
Epoch [1/3], Step [360/3236], Loss: 3.0742, Perplexity: 21.6316
Epoch [1/3], Step [361/3236], Loss: 3.1322, Perplexity: 22.9239
Epoch [1/3], Step [362/3236], Loss: 2.8585, Perplexity: 17.4346
Epoch [1/3], Step [363/3236], Loss: 2.9946, Perplexity: 19.9774
Epoch [1/3], Step [364/3236], Loss: 2.9844, Perplexity: 19.7749
Epoch [1/3], Step [365/3236], Loss: 2.9412, Perplexity: 18.9392
Epoch [1/3], Step [366/3236], Loss: 3.0697, Perplexity: 21.5355
Epoch [1/3], Step [367/3236], Loss: 3.3203, Perplexity: 27.6686
Epoch [1/3], Step [368/3236], Loss: 2.9626, Perplexity: 19.3485
Epoch [1/3], Step [369/3236], Loss: 2.9307, Perplexity: 18.7411
Epoch [1/3], Step [370/3236], Loss: 2.8831, Perplexity: 17.8693
Epoch [1/3], Step [371/3236], Loss: 3.4218, Perplexity: 30.6244
Epoch [1/3], Step [372/3236], Loss: 2.9633, Perplexity: 19.3620
Epoch [1/3], Step [373/3236], Loss: 3.0547, Perplexity: 21.2138
Epoch [1/3], Step [374/3236], Loss: 3.0307, Perplexity: 20.7116
Epoch [1/3], Step [375/3236], Loss: 2.9464, Perplexity: 19.0366
Epoch [1/3], Step [376/3236], Loss: 3.5227, Perplexity: 33.8762
Epoch [1/3], Step [377/3236], Loss: 3.1139, Perplexity: 22.5092
Epoch [1/3], Step [378/3236], Loss: 3.1473, Perplexity: 23.2726
Epoch [1/3], Step [379/3236], Loss: 3.0862, Perplexity: 21.8945
Epoch [1/3], Step [380/3236], Loss: 3.0543, Perplexity: 21.2060
Epoch [1/3], Step [381/3236], Loss: 3.2411, Perplexity: 25.5617
Epoch [1/3], Step [382/3236], Loss: 3.1065, Perplexity: 22.3421
Epoch [1/3], Step [383/3236], Loss: 3.3203, Perplexity: 27.6692
Epoch [1/3], Step [384/3236], Loss: 2.9956, Perplexity: 19.9967
Epoch [1/3], Step [385/3236], Loss: 3.0101, Perplexity: 20.2895
Epoch [1/3], Step [386/3236], Loss: 3.2336, Perplexity: 25.3705
Epoch [1/3], Step [387/3236], Loss: 2.8710, Perplexity: 17.6547
Epoch [1/3], Step [388/3236], Loss: 3.8961, Perplexity: 49.2091
Epoch [1/3], Step [389/3236], Loss: 2.9318, Perplexity: 18.7609
Epoch [1/3], Step [390/3236], Loss: 2.8934, Perplexity: 18.0547
Epoch [1/3], Step [391/3236], Loss: 2.8675, Perplexity: 17.5928
Epoch [1/3], Step [392/3236], Loss: 3.0710, Perplexity: 21.5638
Epoch [1/3], Step [393/3236], Loss: 3.2544, Perplexity: 25.9029
Epoch [1/3], Step [394/3236], Loss: 2.9994, Perplexity: 20.0735
Epoch [1/3], Step [395/3236], Loss: 3.0988, Perplexity: 22.1713
Epoch [1/3], Step [396/3236], Loss: 2.9520, Perplexity: 19.1447
Epoch [1/3], Step [397/3236], Loss: 3.3476, Perplexity: 28.4348
Epoch [1/3], Step [398/3236], Loss: 3.4963, Perplexity: 32.9926
Epoch [1/3], Step [399/3236], Loss: 2.9308, Perplexity: 18.7417
Epoch [1/3], Step [400/3236], Loss: 3.0443, Perplexity: 20.9956
Epoch [1/3], Step [401/3236], Loss: 2.9871, Perplexity: 19.8276
Epoch [1/3], Step [402/3236], Loss: 3.3739, Perplexity: 29.1914
Epoch [1/3], Step [403/3236], Loss: 3.0431, Perplexity: 20.9703
Epoch [1/3], Step [404/3236], Loss: 3.0511, Perplexity: 21.1392
Epoch [1/3], Step [405/3236], Loss: 3.3794, Perplexity: 29.3524
Epoch [1/3], Step [406/3236], Loss: 3.1211, Perplexity: 22.6720
Epoch [1/3], Step [407/3236], Loss: 2.9331, Perplexity: 18.7864
Epoch [1/3], Step [408/3236], Loss: 2.9510, Perplexity: 19.1244
Epoch [1/3], Step [409/3236], Loss: 3.1346, Perplexity: 22.9798
Epoch [1/3], Step [410/3236], Loss: 2.8599, Perplexity: 17.4598
Epoch [1/3], Step [411/3236], Loss: 3.1860, Perplexity: 24.1925
Epoch [1/3], Step [412/3236], Loss: 3.0992, Perplexity: 22.1792
Epoch [1/3], Step [413/3236], Loss: 3.1057, Perplexity: 22.3240
Epoch [1/3], Step [414/3236], Loss: 2.9839, Perplexity: 19.7656
Epoch [1/3], Step [415/3236], Loss: 2.9370, Perplexity: 18.8593
Epoch [1/3], Step [416/3236], Loss: 2.8320, Perplexity: 16.9793
Epoch [1/3], Step [417/3236], Loss: 3.4646, Perplexity: 31.9621
Epoch [1/3], Step [418/3236], Loss: 3.3896, Perplexity: 29.6527
Epoch [1/3], Step [419/3236], Loss: 2.9985, Perplexity: 20.0561
Epoch [1/3], Step [420/3236], Loss: 3.1398, Perplexity: 23.0996
Epoch [1/3], Step [421/3236], Loss: 3.2082, Perplexity: 24.7343
Epoch [1/3], Step [422/3236], Loss: 3.8626, Perplexity: 47.5888
Epoch [1/3], Step [423/3236], Loss: 3.4046, Perplexity: 30.1036
Epoch [1/3], Step [424/3236], Loss: 3.0389, Perplexity: 20.8822
Epoch [1/3], Step [425/3236], Loss: 3.0893, Perplexity: 21.9616
Epoch [1/3], Step [426/3236], Loss: 2.9920, Perplexity: 19.9254
Epoch [1/3], Step [427/3236], Loss: 3.2103, Perplexity: 24.7858
Epoch [1/3], Step [428/3236], Loss: 2.8981, Perplexity: 18.1395
Epoch [1/3], Step [429/3236], Loss: 3.0434, Perplexity: 20.9772
Epoch [1/3], Step [430/3236], Loss: 3.0572, Perplexity: 21.2688
Epoch [1/3], Step [431/3236], Loss: 3.0579, Perplexity: 21.2825
Epoch [1/3], Step [432/3236], Loss: 3.0564, Perplexity: 21.2515
Epoch [1/3], Step [433/3236], Loss: 2.9007, Perplexity: 18.1866
Epoch [1/3], Step [434/3236], Loss: 3.2023, Perplexity: 24.5880
Epoch [1/3], Step [435/3236], Loss: 2.9037, Perplexity: 18.2414
Epoch [1/3], Step [436/3236], Loss: 2.9510, Perplexity: 19.1247
Epoch [1/3], Step [437/3236], Loss: 2.9488, Perplexity: 19.0833
Epoch [1/3], Step [438/3236], Loss: 3.0372, Perplexity: 20.8474
Epoch [1/3], Step [439/3236], Loss: 3.0674, Perplexity: 21.4861
Epoch [1/3], Step [440/3236], Loss: 2.9959, Perplexity: 20.0035
Epoch [1/3], Step [441/3236], Loss: 2.9045, Perplexity: 18.2568
Epoch [1/3], Step [442/3236], Loss: 3.0178, Perplexity: 20.4467
Epoch [1/3], Step [443/3236], Loss: 3.0337, Perplexity: 20.7735
Epoch [1/3], Step [444/3236], Loss: 3.0114, Perplexity: 20.3167
Epoch [1/3], Step [445/3236], Loss: 3.1747, Perplexity: 23.9200
Epoch [1/3], Step [446/3236], Loss: 3.1004, Perplexity: 22.2067
Epoch [1/3], Step [447/3236], Loss: 3.0531, Perplexity: 21.1804
Epoch [1/3], Step [448/3236], Loss: 2.9120, Perplexity: 18.3927
Epoch [1/3], Step [449/3236], Loss: 3.1498, Perplexity: 23.3305
Epoch [1/3], Step [450/3236], Loss: 3.0114, Perplexity: 20.3154
Epoch [1/3], Step [451/3236], Loss: 2.8467, Perplexity: 17.2300
Epoch [1/3], Step [452/3236], Loss: 3.1262, Perplexity: 22.7862
Epoch [1/3], Step [453/3236], Loss: 2.9590, Perplexity: 19.2784
Epoch [1/3], Step [454/3236], Loss: 3.1081, Perplexity: 22.3793
Epoch [1/3], Step [455/3236], Loss: 2.8719, Perplexity: 17.6699
Epoch [1/3], Step [456/3236], Loss: 3.1344, Perplexity: 22.9750
Epoch [1/3], Step [457/3236], Loss: 2.9571, Perplexity: 19.2415
Epoch [1/3], Step [458/3236], Loss: 2.9159, Perplexity: 18.4645
Epoch [1/3], Step [459/3236], Loss: 3.0293, Perplexity: 20.6828
Epoch [1/3], Step [460/3236], Loss: 3.0851, Perplexity: 21.8702
Epoch [1/3], Step [461/3236], Loss: 2.9642, Perplexity: 19.3802
Epoch [1/3], Step [462/3236], Loss: 2.9775, Perplexity: 19.6391
Epoch [1/3], Step [463/3236], Loss: 3.2369, Perplexity: 25.4553
Epoch [1/3], Step [464/3236], Loss: 3.1300, Perplexity: 22.8750
Epoch [1/3], Step [465/3236], Loss: 2.8322, Perplexity: 16.9829
Epoch [1/3], Step [466/3236], Loss: 3.0060, Perplexity: 20.2071
Epoch [1/3], Step [467/3236], Loss: 3.3468, Perplexity: 28.4115
Epoch [1/3], Step [468/3236], Loss: 2.9134, Perplexity: 18.4193
Epoch [1/3], Step [469/3236], Loss: 2.8210, Perplexity: 16.7938
Epoch [1/3], Step [470/3236], Loss: 2.9268, Perplexity: 18.6681
Epoch [1/3], Step [471/3236], Loss: 3.2561, Perplexity: 25.9480
Epoch [1/3], Step [472/3236], Loss: 3.0444, Perplexity: 20.9972
Epoch [1/3], Step [473/3236], Loss: 2.8187, Perplexity: 16.7557
Epoch [1/3], Step [474/3236], Loss: 2.9110, Perplexity: 18.3760
Epoch [1/3], Step [475/3236], Loss: 2.9818, Perplexity: 19.7228
Epoch [1/3], Step [476/3236], Loss: 3.5475, Perplexity: 34.7267
Epoch [1/3], Step [477/3236], Loss: 2.8662, Perplexity: 17.5700
Epoch [1/3], Step [478/3236], Loss: 2.9547, Perplexity: 19.1964
Epoch [1/3], Step [479/3236], Loss: 3.0381, Perplexity: 20.8648
Epoch [1/3], Step [480/3236], Loss: 3.1387, Perplexity: 23.0743
Epoch [1/3], Step [481/3236], Loss: 2.7677, Perplexity: 15.9220
Epoch [1/3], Step [482/3236], Loss: 2.8689, Perplexity: 17.6170
Epoch [1/3], Step [483/3236], Loss: 3.0468, Perplexity: 21.0482
Epoch [1/3], Step [484/3236], Loss: 3.4327, Perplexity: 30.9605
Epoch [1/3], Step [485/3236], Loss: 3.0131, Perplexity: 20.3506
Epoch [1/3], Step [486/3236], Loss: 2.8168, Perplexity: 16.7229
Epoch [1/3], Step [487/3236], Loss: 2.8695, Perplexity: 17.6277
Epoch [1/3], Step [488/3236], Loss: 2.9272, Perplexity: 18.6752
Epoch [1/3], Step [489/3236], Loss: 3.4536, Perplexity: 31.6146
Epoch [1/3], Step [490/3236], Loss: 2.9771, Perplexity: 19.6309
Epoch [1/3], Step [491/3236], Loss: 2.8003, Perplexity: 16.4499
Epoch [1/3], Step [492/3236], Loss: 2.8922, Perplexity: 18.0322
Epoch [1/3], Step [493/3236], Loss: 3.0223, Perplexity: 20.5389
Epoch [1/3], Step [494/3236], Loss: 2.8638, Perplexity: 17.5278
Epoch [1/3], Step [495/3236], Loss: 2.8123, Perplexity: 16.6479
Epoch [1/3], Step [496/3236], Loss: 2.9271, Perplexity: 18.6737
Epoch [1/3], Step [497/3236], Loss: 2.8519, Perplexity: 17.3211
Epoch [1/3], Step [498/3236], Loss: 2.8306, Perplexity: 16.9551
Epoch [1/3], Step [499/3236], Loss: 2.9780, Perplexity: 19.6486
Epoch [1/3], Step [500/3236], Loss: 2.8287, Perplexity: 16.9232
Epoch [1/3], Step [501/3236], Loss: 2.9304, Perplexity: 18.7360
Epoch [1/3], Step [502/3236], Loss: 2.8425, Perplexity: 17.1590
Epoch [1/3], Step [503/3236], Loss: 2.8846, Perplexity: 17.8964
Epoch [1/3], Step [504/3236], Loss: 3.3105, Perplexity: 27.4002
Epoch [1/3], Step [505/3236], Loss: 2.9525, Perplexity: 19.1541
Epoch [1/3], Step [506/3236], Loss: 2.8147, Perplexity: 16.6880
Epoch [1/3], Step [507/3236], Loss: 3.0603, Perplexity: 21.3344
Epoch [1/3], Step [508/3236], Loss: 2.7242, Perplexity: 15.2443
Epoch [1/3], Step [509/3236], Loss: 2.7960, Perplexity: 16.3784
Epoch [1/3], Step [510/3236], Loss: 2.9905, Perplexity: 19.8952
Epoch [1/3], Step [511/3236], Loss: 2.8507, Perplexity: 17.3002
Epoch [1/3], Step [512/3236], Loss: 3.3192, Perplexity: 27.6387
Epoch [1/3], Step [513/3236], Loss: 2.7612, Perplexity: 15.8192
Epoch [1/3], Step [514/3236], Loss: 3.2937, Perplexity: 26.9432
Epoch [1/3], Step [515/3236], Loss: 2.8555, Perplexity: 17.3827
Epoch [1/3], Step [516/3236], Loss: 2.8817, Perplexity: 17.8448
Epoch [1/3], Step [517/3236], Loss: 3.3113, Perplexity: 27.4216
Epoch [1/3], Step [518/3236], Loss: 4.3065, Perplexity: 74.1826
Epoch [1/3], Step [519/3236], Loss: 3.0722, Perplexity: 21.5899
Epoch [1/3], Step [520/3236], Loss: 3.1217, Perplexity: 22.6853
Epoch [1/3], Step [521/3236], Loss: 3.2416, Perplexity: 25.5754
Epoch [1/3], Step [522/3236], Loss: 2.9476, Perplexity: 19.0608
Epoch [1/3], Step [523/3236], Loss: 3.0324, Perplexity: 20.7469
Epoch [1/3], Step [524/3236], Loss: 2.9748, Perplexity: 19.5853
Epoch [1/3], Step [525/3236], Loss: 3.5193, Perplexity: 33.7621
Epoch [1/3], Step [526/3236], Loss: 3.0253, Perplexity: 20.5996
Epoch [1/3], Step [527/3236], Loss: 2.9525, Perplexity: 19.1539
Epoch [1/3], Step [528/3236], Loss: 2.9765, Perplexity: 19.6194
Epoch [1/3], Step [529/3236], Loss: 2.7893, Perplexity: 16.2700
Epoch [1/3], Step [530/3236], Loss: 2.9548, Perplexity: 19.1973
Epoch [1/3], Step [531/3236], Loss: 2.9512, Perplexity: 19.1292
Epoch [1/3], Step [532/3236], Loss: 3.0051, Perplexity: 20.1888
Epoch [1/3], Step [533/3236], Loss: 3.1057, Perplexity: 22.3249
Epoch [1/3], Step [534/3236], Loss: 2.9622, Perplexity: 19.3404
Epoch [1/3], Step [535/3236], Loss: 2.8231, Perplexity: 16.8295
Epoch [1/3], Step [536/3236], Loss: 2.7964, Perplexity: 16.3852
Epoch [1/3], Step [537/3236], Loss: 3.1147, Perplexity: 22.5262
Epoch [1/3], Step [538/3236], Loss: 2.9378, Perplexity: 18.8743
Epoch [1/3], Step [539/3236], Loss: 2.9112, Perplexity: 18.3792
Epoch [1/3], Step [540/3236], Loss: 3.2297, Perplexity: 25.2732
Epoch [1/3], Step [541/3236], Loss: 2.9153, Perplexity: 18.4551
Epoch [1/3], Step [542/3236], Loss: 3.0911, Perplexity: 22.0012
Epoch [1/3], Step [543/3236], Loss: 2.8925, Perplexity: 18.0379
Epoch [1/3], Step [544/3236], Loss: 2.9956, Perplexity: 19.9977
Epoch [1/3], Step [545/3236], Loss: 2.8728, Perplexity: 17.6863
Epoch [1/3], Step [546/3236], Loss: 2.8522, Perplexity: 17.3252
Epoch [1/3], Step [547/3236], Loss: 3.0382, Perplexity: 20.8682
Epoch [1/3], Step [548/3236], Loss: 2.9694, Perplexity: 19.4793
Epoch [1/3], Step [549/3236], Loss: 2.9270, Perplexity: 18.6724
Epoch [1/3], Step [550/3236], Loss: 2.9986, Perplexity: 20.0579
Epoch [1/3], Step [551/3236], Loss: 2.8811, Perplexity: 17.8347
Epoch [1/3], Step [552/3236], Loss: 2.8646, Perplexity: 17.5423
Epoch [1/3], Step [553/3236], Loss: 2.7844, Perplexity: 16.1905
Epoch [1/3], Step [554/3236], Loss: 2.8974, Perplexity: 18.1275
Epoch [1/3], Step [555/3236], Loss: 2.8419, Perplexity: 17.1475
Epoch [1/3], Step [556/3236], Loss: 3.2892, Perplexity: 26.8220
Epoch [1/3], Step [557/3236], Loss: 3.6706, Perplexity: 39.2740
Epoch [1/3], Step [558/3236], Loss: 2.8049, Perplexity: 16.5257
Epoch [1/3], Step [559/3236], Loss: 3.1494, Perplexity: 23.3221
Epoch [1/3], Step [560/3236], Loss: 3.0870, Perplexity: 21.9117
Epoch [1/3], Step [561/3236], Loss: 2.8705, Perplexity: 17.6462
Epoch [1/3], Step [562/3236], Loss: 2.7443, Perplexity: 15.5539
Epoch [1/3], Step [563/3236], Loss: 3.2814, Perplexity: 26.6131
Epoch [1/3], Step [564/3236], Loss: 3.1198, Perplexity: 22.6412
Epoch [1/3], Step [565/3236], Loss: 2.7790, Perplexity: 16.1031
Epoch [1/3], Step [566/3236], Loss: 2.8047, Perplexity: 16.5213
Epoch [1/3], Step [567/3236], Loss: 2.8375, Perplexity: 17.0722
Epoch [1/3], Step [568/3236], Loss: 3.0936, Perplexity: 22.0571
Epoch [1/3], Step [569/3236], Loss: 3.0188, Perplexity: 20.4676
Epoch [1/3], Step [570/3236], Loss: 2.8727, Perplexity: 17.6846
Epoch [1/3], Step [571/3236], Loss: 2.9537, Perplexity: 19.1768
Epoch [1/3], Step [572/3236], Loss: 3.0489, Perplexity: 21.0925
Epoch [1/3], Step [573/3236], Loss: 3.3651, Perplexity: 28.9361
Epoch [1/3], Step [574/3236], Loss: 3.0593, Perplexity: 21.3119
Epoch [1/3], Step [575/3236], Loss: 2.7313, Perplexity: 15.3529
Epoch [1/3], Step [576/3236], Loss: 2.7645, Perplexity: 15.8715
Epoch [1/3], Step [577/3236], Loss: 2.9123, Perplexity: 18.3982
Epoch [1/3], Step [578/3236], Loss: 2.8867, Perplexity: 17.9343
Epoch [1/3], Step [579/3236], Loss: 2.9860, Perplexity: 19.8066
Epoch [1/3], Step [580/3236], Loss: 2.8854, Perplexity: 17.9100
Epoch [1/3], Step [581/3236], Loss: 2.8482, Perplexity: 17.2565
Epoch [1/3], Step [582/3236], Loss: 2.7207, Perplexity: 15.1912
Epoch [1/3], Step [583/3236], Loss: 2.7995, Perplexity: 16.4367
Epoch [1/3], Step [584/3236], Loss: 2.7895, Perplexity: 16.2736
Epoch [1/3], Step [585/3236], Loss: 3.0042, Perplexity: 20.1710
Epoch [1/3], Step [586/3236], Loss: 2.7562, Perplexity: 15.7392
Epoch [1/3], Step [587/3236], Loss: 2.7950, Perplexity: 16.3624
Epoch [1/3], Step [588/3236], Loss: 2.8925, Perplexity: 18.0382
Epoch [1/3], Step [589/3236], Loss: 2.8324, Perplexity: 16.9859
Epoch [1/3], Step [590/3236], Loss: 2.9128, Perplexity: 18.4091
Epoch [1/3], Step [591/3236], Loss: 2.7101, Perplexity: 15.0302
Epoch [1/3], Step [592/3236], Loss: 2.7953, Perplexity: 16.3680
Epoch [1/3], Step [593/3236], Loss: 2.8429, Perplexity: 17.1653
Epoch [1/3], Step [594/3236], Loss: 3.3970, Perplexity: 29.8744
Epoch [1/3], Step [595/3236], Loss: 2.7281, Perplexity: 15.3043
Epoch [1/3], Step [596/3236], Loss: 3.3569, Perplexity: 28.7000
Epoch [1/3], Step [597/3236], Loss: 2.9811, Perplexity: 19.7101
Epoch [1/3], Step [598/3236], Loss: 2.9006, Perplexity: 18.1845
Epoch [1/3], Step [599/3236], Loss: 2.9085, Perplexity: 18.3301
Epoch [1/3], Step [600/3236], Loss: 2.7373, Perplexity: 15.4450
Epoch [1/3], Step [601/3236], Loss: 2.6708, Perplexity: 14.4521
Epoch [1/3], Step [602/3236], Loss: 2.8471, Perplexity: 17.2380
Epoch [1/3], Step [603/3236], Loss: 3.0770, Perplexity: 21.6929
Epoch [1/3], Step [604/3236], Loss: 3.2885, Perplexity: 26.8030
Epoch [1/3], Step [605/3236], Loss: 3.5162, Perplexity: 33.6578
Epoch [1/3], Step [606/3236], Loss: 2.8565, Perplexity: 17.4002
Epoch [1/3], Step [607/3236], Loss: 2.9456, Perplexity: 19.0211
Epoch [1/3], Step [608/3236], Loss: 2.7825, Perplexity: 16.1591
Epoch [1/3], Step [609/3236], Loss: 3.0984, Perplexity: 22.1629
Epoch [1/3], Step [610/3236], Loss: 2.9437, Perplexity: 18.9856
Epoch [1/3], Step [611/3236], Loss: 3.2593, Perplexity: 26.0322
Epoch [1/3], Step [612/3236], Loss: 3.8103, Perplexity: 45.1646
Epoch [1/3], Step [613/3236], Loss: 2.9777, Perplexity: 19.6422
Epoch [1/3], Step [614/3236], Loss: 2.8656, Perplexity: 17.5588
Epoch [1/3], Step [615/3236], Loss: 2.9005, Perplexity: 18.1839
Epoch [1/3], Step [616/3236], Loss: 2.8598, Perplexity: 17.4580
Epoch [1/3], Step [617/3236], Loss: 2.8849, Perplexity: 17.9024
Epoch [1/3], Step [618/3236], Loss: 2.9543, Perplexity: 19.1882
Epoch [1/3], Step [619/3236], Loss: 2.9179, Perplexity: 18.5032
Epoch [1/3], Step [620/3236], Loss: 2.9198, Perplexity: 18.5385
Epoch [1/3], Step [621/3236], Loss: 2.7170, Perplexity: 15.1352
Epoch [1/3], Step [622/3236], Loss: 2.7973, Perplexity: 16.4007
Epoch [1/3], Step [623/3236], Loss: 2.6519, Perplexity: 14.1816
Epoch [1/3], Step [624/3236], Loss: 2.8207, Perplexity: 16.7890
Epoch [1/3], Step [625/3236], Loss: 2.8475, Perplexity: 17.2443
Epoch [1/3], Step [626/3236], Loss: 2.8788, Perplexity: 17.7922
Epoch [1/3], Step [627/3236], Loss: 2.6598, Perplexity: 14.2939
Epoch [1/3], Step [628/3236], Loss: 2.8253, Perplexity: 16.8652
Epoch [1/3], Step [629/3236], Loss: 2.8561, Perplexity: 17.3928
Epoch [1/3], Step [630/3236], Loss: 2.7246, Perplexity: 15.2504
Epoch [1/3], Step [631/3236], Loss: 2.8289, Perplexity: 16.9270
Epoch [1/3], Step [632/3236], Loss: 2.8856, Perplexity: 17.9145
Epoch [1/3], Step [633/3236], Loss: 2.9114, Perplexity: 18.3831
Epoch [1/3], Step [634/3236], Loss: 2.7697, Perplexity: 15.9541
Epoch [1/3], Step [635/3236], Loss: 2.9387, Perplexity: 18.8912
Epoch [1/3], Step [636/3236], Loss: 2.9593, Perplexity: 19.2843
Epoch [1/3], Step [637/3236], Loss: 3.2667, Perplexity: 26.2254
Epoch [1/3], Step [638/3236], Loss: 2.7542, Perplexity: 15.7089
Epoch [1/3], Step [639/3236], Loss: 3.2623, Perplexity: 26.1104
Epoch [1/3], Step [640/3236], Loss: 3.2315, Perplexity: 25.3180
Epoch [1/3], Step [641/3236], Loss: 3.1470, Perplexity: 23.2653
Epoch [1/3], Step [642/3236], Loss: 2.9519, Perplexity: 19.1424
Epoch [1/3], Step [643/3236], Loss: 2.7665, Perplexity: 15.9033
Epoch [1/3], Step [644/3236], Loss: 3.0150, Perplexity: 20.3894
Epoch [1/3], Step [645/3236], Loss: 2.8946, Perplexity: 18.0758
Epoch [1/3], Step [646/3236], Loss: 2.7528, Perplexity: 15.6866
Epoch [1/3], Step [647/3236], Loss: 2.7446, Perplexity: 15.5585
Epoch [1/3], Step [648/3236], Loss: 3.3367, Perplexity: 28.1256
Epoch [1/3], Step [649/3236], Loss: 3.0133, Perplexity: 20.3539
Epoch [1/3], Step [650/3236], Loss: 3.1670, Perplexity: 23.7354
Epoch [1/3], Step [651/3236], Loss: 2.8965, Perplexity: 18.1115
Epoch [1/3], Step [652/3236], Loss: 2.8848, Perplexity: 17.8993
Epoch [1/3], Step [653/3236], Loss: 2.8651, Perplexity: 17.5513
Epoch [1/3], Step [654/3236], Loss: 2.9232, Perplexity: 18.6004
Epoch [1/3], Step [655/3236], Loss: 2.8548, Perplexity: 17.3703
Epoch [1/3], Step [656/3236], Loss: 2.6227, Perplexity: 13.7724
Epoch [1/3], Step [657/3236], Loss: 2.9552, Perplexity: 19.2056
Epoch [1/3], Step [658/3236], Loss: 2.8831, Perplexity: 17.8705
Epoch [1/3], Step [659/3236], Loss: 2.7873, Perplexity: 16.2364
Epoch [1/3], Step [660/3236], Loss: 3.1964, Perplexity: 24.4444
Epoch [1/3], Step [661/3236], Loss: 2.6950, Perplexity: 14.8061
Epoch [1/3], Step [662/3236], Loss: 2.8476, Perplexity: 17.2456
Epoch [1/3], Step [663/3236], Loss: 2.6893, Perplexity: 14.7212
Epoch [1/3], Step [664/3236], Loss: 3.3128, Perplexity: 27.4609
Epoch [1/3], Step [665/3236], Loss: 2.9309, Perplexity: 18.7447
Epoch [1/3], Step [666/3236], Loss: 2.6702, Perplexity: 14.4432
Epoch [1/3], Step [667/3236], Loss: 2.8947, Perplexity: 18.0776
Epoch [1/3], Step [668/3236], Loss: 2.7051, Perplexity: 14.9554
Epoch [1/3], Step [669/3236], Loss: 2.9522, Perplexity: 19.1478
Epoch [1/3], Step [670/3236], Loss: 2.8473, Perplexity: 17.2420
Epoch [1/3], Step [671/3236], Loss: 2.7712, Perplexity: 15.9783
Epoch [1/3], Step [672/3236], Loss: 2.7226, Perplexity: 15.2195
Epoch [1/3], Step [673/3236], Loss: 2.7384, Perplexity: 15.4618
Epoch [1/3], Step [674/3236], Loss: 2.7508, Perplexity: 15.6544
Epoch [1/3], Step [675/3236], Loss: 2.8451, Perplexity: 17.2026
Epoch [1/3], Step [676/3236], Loss: 2.9165, Perplexity: 18.4756
Epoch [1/3], Step [677/3236], Loss: 2.8348, Perplexity: 17.0261
Epoch [1/3], Step [678/3236], Loss: 2.7715, Perplexity: 15.9834
Epoch [1/3], Step [679/3236], Loss: 2.9330, Perplexity: 18.7843
Epoch [1/3], Step [680/3236], Loss: 2.8336, Perplexity: 17.0064
Epoch [1/3], Step [681/3236], Loss: 2.8755, Perplexity: 17.7336
Epoch [1/3], Step [682/3236], Loss: 3.2801, Perplexity: 26.5771
Epoch [1/3], Step [683/3236], Loss: 2.6854, Perplexity: 14.6642
Epoch [1/3], Step [684/3236], Loss: 2.6366, Perplexity: 13.9661
Epoch [1/3], Step [685/3236], Loss: 3.0211, Perplexity: 20.5139
Epoch [1/3], Step [686/3236], Loss: 2.8340, Perplexity: 17.0129
Epoch [1/3], Step [687/3236], Loss: 2.9814, Perplexity: 19.7148
Epoch [1/3], Step [688/3236], Loss: 2.7457, Perplexity: 15.5749
Epoch [1/3], Step [689/3236], Loss: 2.6572, Perplexity: 14.2569
Epoch [1/3], Step [690/3236], Loss: 2.7818, Perplexity: 16.1484
Epoch [1/3], Step [691/3236], Loss: 2.6301, Perplexity: 13.8756
Epoch [1/3], Step [692/3236], Loss: 2.9131, Perplexity: 18.4129
Epoch [1/3], Step [693/3236], Loss: 2.7329, Perplexity: 15.3770
Epoch [1/3], Step [694/3236], Loss: 2.8580, Perplexity: 17.4263
Epoch [1/3], Step [695/3236], Loss: 2.7958, Perplexity: 16.3757
Epoch [1/3], Step [696/3236], Loss: 2.8295, Perplexity: 16.9377
Epoch [1/3], Step [697/3236], Loss: 2.7907, Perplexity: 16.2922
Epoch [1/3], Step [698/3236], Loss: 2.7784, Perplexity: 16.0937
Epoch [1/3], Step [699/3236], Loss: 3.2564, Perplexity: 25.9557
Epoch [1/3], Step [700/3236], Loss: 3.0472, Perplexity: 21.0560
Epoch [1/3], Step [701/3236], Loss: 3.2372, Perplexity: 25.4624
Epoch [1/3], Step [702/3236], Loss: 2.7019, Perplexity: 14.9076
Epoch [1/3], Step [703/3236], Loss: 2.8445, Perplexity: 17.1924
Epoch [1/3], Step [704/3236], Loss: 2.8103, Perplexity: 16.6144
Epoch [1/3], Step [705/3236], Loss: 2.8193, Perplexity: 16.7647
Epoch [1/3], Step [706/3236], Loss: 2.8119, Perplexity: 16.6416
Epoch [1/3], Step [707/3236], Loss: 2.7906, Perplexity: 16.2905
Epoch [1/3], Step [708/3236], Loss: 2.8200, Perplexity: 16.7773
Epoch [1/3], Step [709/3236], Loss: 3.0006, Perplexity: 20.0970
Epoch [1/3], Step [710/3236], Loss: 2.7816, Perplexity: 16.1448
Epoch [1/3], Step [711/3236], Loss: 2.7731, Perplexity: 16.0078
Epoch [1/3], Step [712/3236], Loss: 3.1405, Perplexity: 23.1156
Epoch [1/3], Step [713/3236], Loss: 2.8112, Perplexity: 16.6304
Epoch [1/3], Step [714/3236], Loss: 2.8502, Perplexity: 17.2907
Epoch [1/3], Step [715/3236], Loss: 2.8961, Perplexity: 18.1041
Epoch [1/3], Step [716/3236], Loss: 2.7991, Perplexity: 16.4303
Epoch [1/3], Step [717/3236], Loss: 2.8435, Perplexity: 17.1758
Epoch [1/3], Step [718/3236], Loss: 2.6518, Perplexity: 14.1796
Epoch [1/3], Step [719/3236], Loss: 2.8168, Perplexity: 16.7229
Epoch [1/3], Step [720/3236], Loss: 2.7527, Perplexity: 15.6852
Epoch [1/3], Step [721/3236], Loss: 2.7107, Perplexity: 15.0395
Epoch [1/3], Step [722/3236], Loss: 3.2453, Perplexity: 25.6681
Epoch [1/3], Step [723/3236], Loss: 2.6895, Perplexity: 14.7247
Epoch [1/3], Step [724/3236], Loss: 2.6561, Perplexity: 14.2413
Epoch [1/3], Step [725/3236], Loss: 2.7076, Perplexity: 14.9935
Epoch [1/3], Step [726/3236], Loss: 2.9743, Perplexity: 19.5754
Epoch [1/3], Step [727/3236], Loss: 2.8660, Perplexity: 17.5668
Epoch [1/3], Step [728/3236], Loss: 2.7920, Perplexity: 16.3129
Epoch [1/3], Step [729/3236], Loss: 2.7990, Perplexity: 16.4276
Epoch [1/3], Step [730/3236], Loss: 3.2976, Perplexity: 27.0468
Epoch [1/3], Step [731/3236], Loss: 3.0521, Perplexity: 21.1602
Epoch [1/3], Step [732/3236], Loss: 2.7925, Perplexity: 16.3215
Epoch [1/3], Step [733/3236], Loss: 2.8165, Perplexity: 16.7180
Epoch [1/3], Step [734/3236], Loss: 2.7686, Perplexity: 15.9367
Epoch [1/3], Step [735/3236], Loss: 2.7170, Perplexity: 15.1346
Epoch [1/3], Step [736/3236], Loss: 2.8240, Perplexity: 16.8440
Epoch [1/3], Step [737/3236], Loss: 3.0591, Perplexity: 21.3088
Epoch [1/3], Step [738/3236], Loss: 2.7675, Perplexity: 15.9193
Epoch [1/3], Step [739/3236], Loss: 2.8525, Perplexity: 17.3304
Epoch [1/3], Step [740/3236], Loss: 2.8388, Perplexity: 17.0949
Epoch [1/3], Step [741/3236], Loss: 2.9019, Perplexity: 18.2084
Epoch [1/3], Step [742/3236], Loss: 3.1860, Perplexity: 24.1909
Epoch [1/3], Step [743/3236], Loss: 2.7641, Perplexity: 15.8642
Epoch [1/3], Step [744/3236], Loss: 2.7687, Perplexity: 15.9386
Epoch [1/3], Step [745/3236], Loss: 2.8737, Perplexity: 17.7019
Epoch [1/3], Step [746/3236], Loss: 2.5950, Perplexity: 13.3971
Epoch [1/3], Step [747/3236], Loss: 2.7890, Perplexity: 16.2648
Epoch [1/3], Step [748/3236], Loss: 3.5863, Perplexity: 36.0988
Epoch [1/3], Step [749/3236], Loss: 3.1243, Perplexity: 22.7430
Epoch [1/3], Step [750/3236], Loss: 2.6298, Perplexity: 13.8710
Epoch [1/3], Step [751/3236], Loss: 2.8707, Perplexity: 17.6487
Epoch [1/3], Step [752/3236], Loss: 2.7047, Perplexity: 14.9500
Epoch [1/3], Step [753/3236], Loss: 2.8178, Perplexity: 16.7399
Epoch [1/3], Step [754/3236], Loss: 2.8485, Perplexity: 17.2624
Epoch [1/3], Step [755/3236], Loss: 2.7199, Perplexity: 15.1790
Epoch [1/3], Step [756/3236], Loss: 2.8438, Perplexity: 17.1801
Epoch [1/3], Step [757/3236], Loss: 2.6810, Perplexity: 14.6003
Epoch [1/3], Step [758/3236], Loss: 2.9054, Perplexity: 18.2719
Epoch [1/3], Step [759/3236], Loss: 3.2444, Perplexity: 25.6468
Epoch [1/3], Step [760/3236], Loss: 2.6313, Perplexity: 13.8912
Epoch [1/3], Step [761/3236], Loss: 2.8658, Perplexity: 17.5634
Epoch [1/3], Step [762/3236], Loss: 2.8500, Perplexity: 17.2876
Epoch [1/3], Step [763/3236], Loss: 2.7449, Perplexity: 15.5630
Epoch [1/3], Step [764/3236], Loss: 2.8740, Perplexity: 17.7079
Epoch [1/3], Step [765/3236], Loss: 2.7010, Perplexity: 14.8944
Epoch [1/3], Step [766/3236], Loss: 2.6744, Perplexity: 14.5034
Epoch [1/3], Step [767/3236], Loss: 2.8341, Perplexity: 17.0148
Epoch [1/3], Step [768/3236], Loss: 2.6655, Perplexity: 14.3752
Epoch [1/3], Step [769/3236], Loss: 2.6508, Perplexity: 14.1647
Epoch [1/3], Step [770/3236], Loss: 3.0082, Perplexity: 20.2505
Epoch [1/3], Step [771/3236], Loss: 3.5295, Perplexity: 34.1078
Epoch [1/3], Step [772/3236], Loss: 2.8723, Perplexity: 17.6769
Epoch [1/3], Step [773/3236], Loss: 2.6946, Perplexity: 14.7999
Epoch [1/3], Step [774/3236], Loss: 3.1494, Perplexity: 23.3214
Epoch [1/3], Step [775/3236], Loss: 2.8201, Perplexity: 16.7786
Epoch [1/3], Step [776/3236], Loss: 2.8353, Perplexity: 17.0361
Epoch [1/3], Step [777/3236], Loss: 2.9191, Perplexity: 18.5237
Epoch [1/3], Step [778/3236], Loss: 2.8870, Perplexity: 17.9393
Epoch [1/3], Step [779/3236], Loss: 2.8720, Perplexity: 17.6724
Epoch [1/3], Step [780/3236], Loss: 2.7658, Perplexity: 15.8921
Epoch [1/3], Step [781/3236], Loss: 2.7943, Perplexity: 16.3516
Epoch [1/3], Step [782/3236], Loss: 2.7719, Perplexity: 15.9886
Epoch [1/3], Step [783/3236], Loss: 2.7539, Perplexity: 15.7039
Epoch [1/3], Step [784/3236], Loss: 2.9028, Perplexity: 18.2259
Epoch [1/3], Step [785/3236], Loss: 3.5514, Perplexity: 34.8626
Epoch [1/3], Step [786/3236], Loss: 2.9733, Perplexity: 19.5564
Epoch [1/3], Step [787/3236], Loss: 2.6323, Perplexity: 13.9057
Epoch [1/3], Step [788/3236], Loss: 2.9717, Perplexity: 19.5248
Epoch [1/3], Step [789/3236], Loss: 2.7468, Perplexity: 15.5919
Epoch [1/3], Step [790/3236], Loss: 2.7574, Perplexity: 15.7585
Epoch [1/3], Step [791/3236], Loss: 2.7772, Perplexity: 16.0746
Epoch [1/3], Step [792/3236], Loss: 2.7072, Perplexity: 14.9877
Epoch [1/3], Step [793/3236], Loss: 2.6684, Perplexity: 14.4167
Epoch [1/3], Step [794/3236], Loss: 2.9092, Perplexity: 18.3430
Epoch [1/3], Step [795/3236], Loss: 2.8578, Perplexity: 17.4228
Epoch [1/3], Step [796/3236], Loss: 2.7641, Perplexity: 15.8646
Epoch [1/3], Step [797/3236], Loss: 2.8027, Perplexity: 16.4887
Epoch [1/3], Step [798/3236], Loss: 2.6128, Perplexity: 13.6374
Epoch [1/3], Step [799/3236], Loss: 2.7144, Perplexity: 15.0960
Epoch [1/3], Step [800/3236], Loss: 2.7848, Perplexity: 16.1971
Epoch [1/3], Step [801/3236], Loss: 2.8080, Perplexity: 16.5760
Epoch [1/3], Step [802/3236], Loss: 2.6846, Perplexity: 14.6526
Epoch [1/3], Step [803/3236], Loss: 2.5929, Perplexity: 13.3683
Epoch [1/3], Step [804/3236], Loss: 2.8734, Perplexity: 17.6963
Epoch [1/3], Step [805/3236], Loss: 2.6358, Perplexity: 13.9549
Epoch [1/3], Step [806/3236], Loss: 2.8295, Perplexity: 16.9373
Epoch [1/3], Step [807/3236], Loss: 2.7213, Perplexity: 15.1994
Epoch [1/3], Step [808/3236], Loss: 2.7956, Perplexity: 16.3731
Epoch [1/3], Step [809/3236], Loss: 2.6775, Perplexity: 14.5485
Epoch [1/3], Step [810/3236], Loss: 2.6239, Perplexity: 13.7888
Epoch [1/3], Step [811/3236], Loss: 2.7868, Perplexity: 16.2293
Epoch [1/3], Step [812/3236], Loss: 2.8248, Perplexity: 16.8568
Epoch [1/3], Step [813/3236], Loss: 2.6362, Perplexity: 13.9596
Epoch [1/3], Step [814/3236], Loss: 2.7427, Perplexity: 15.5287
Epoch [1/3], Step [815/3236], Loss: 2.7543, Perplexity: 15.7093
Epoch [1/3], Step [816/3236], Loss: 2.8046, Perplexity: 16.5199
Epoch [1/3], Step [817/3236], Loss: 3.1796, Perplexity: 24.0376
Epoch [1/3], Step [818/3236], Loss: 2.7474, Perplexity: 15.6018
Epoch [1/3], Step [819/3236], Loss: 2.5400, Perplexity: 12.6793
Epoch [1/3], Step [820/3236], Loss: 2.6588, Perplexity: 14.2796
Epoch [1/3], Step [821/3236], Loss: 2.6775, Perplexity: 14.5484
Epoch [1/3], Step [822/3236], Loss: 2.8216, Perplexity: 16.8031
Epoch [1/3], Step [823/3236], Loss: 2.6780, Perplexity: 14.5555
Epoch [1/3], Step [824/3236], Loss: 2.8742, Perplexity: 17.7107
Epoch [1/3], Step [825/3236], Loss: 2.9636, Perplexity: 19.3674
Epoch [1/3], Step [826/3236], Loss: 2.6497, Perplexity: 14.1495
Epoch [1/3], Step [827/3236], Loss: 2.6651, Perplexity: 14.3699
Epoch [1/3], Step [828/3236], Loss: 2.7857, Perplexity: 16.2112
Epoch [1/3], Step [829/3236], Loss: 2.9107, Perplexity: 18.3703
Epoch [1/3], Step [830/3236], Loss: 3.2417, Perplexity: 25.5772
Epoch [1/3], Step [831/3236], Loss: 2.7434, Perplexity: 15.5390
Epoch [1/3], Step [832/3236], Loss: 2.6965, Perplexity: 14.8275
Epoch [1/3], Step [833/3236], Loss: 2.7409, Perplexity: 15.5007
Epoch [1/3], Step [834/3236], Loss: 2.8091, Perplexity: 16.5944
Epoch [1/3], Step [835/3236], Loss: 2.8106, Perplexity: 16.6199
Epoch [1/3], Step [836/3236], Loss: 2.6817, Perplexity: 14.6101
Epoch [1/3], Step [837/3236], Loss: 2.8272, Perplexity: 16.8983
Epoch [1/3], Step [838/3236], Loss: 2.6091, Perplexity: 13.5870
Epoch [1/3], Step [839/3236], Loss: 2.6838, Perplexity: 14.6411
Epoch [1/3], Step [840/3236], Loss: 2.7599, Perplexity: 15.7985
Epoch [1/3], Step [841/3236], Loss: 2.8217, Perplexity: 16.8047
Epoch [1/3], Step [842/3236], Loss: 2.6920, Perplexity: 14.7607
Epoch [1/3], Step [843/3236], Loss: 2.8027, Perplexity: 16.4897
Epoch [1/3], Step [844/3236], Loss: 2.6152, Perplexity: 13.6701
Epoch [1/3], Step [845/3236], Loss: 2.6462, Perplexity: 14.1007
Epoch [1/3], Step [846/3236], Loss: 3.0360, Perplexity: 20.8211
Epoch [1/3], Step [847/3236], Loss: 2.6367, Perplexity: 13.9668
Epoch [1/3], Step [848/3236], Loss: 2.7054, Perplexity: 14.9604
Epoch [1/3], Step [849/3236], Loss: 2.6513, Perplexity: 14.1720
Epoch [1/3], Step [850/3236], Loss: 2.7781, Perplexity: 16.0884
Epoch [1/3], Step [851/3236], Loss: 2.9532, Perplexity: 19.1663
Epoch [1/3], Step [852/3236], Loss: 2.7691, Perplexity: 15.9435
Epoch [1/3], Step [853/3236], Loss: 2.6908, Perplexity: 14.7440
Epoch [1/3], Step [854/3236], Loss: 2.8114, Perplexity: 16.6332
Epoch [1/3], Step [855/3236], Loss: 2.7177, Perplexity: 15.1453
Epoch [1/3], Step [856/3236], Loss: 2.8280, Perplexity: 16.9123
Epoch [1/3], Step [857/3236], Loss: 3.0494, Perplexity: 21.1022
Epoch [1/3], Step [858/3236], Loss: 2.8211, Perplexity: 16.7952
Epoch [1/3], Step [859/3236], Loss: 2.8127, Perplexity: 16.6543
Epoch [1/3], Step [860/3236], Loss: 2.6367, Perplexity: 13.9673
Epoch [1/3], Step [861/3236], Loss: 3.0673, Perplexity: 21.4835
Epoch [1/3], Step [862/3236], Loss: 2.8228, Perplexity: 16.8231
Epoch [1/3], Step [863/3236], Loss: 2.7569, Perplexity: 15.7514
Epoch [1/3], Step [864/3236], Loss: 2.9365, Perplexity: 18.8499
Epoch [1/3], Step [865/3236], Loss: 2.9631, Perplexity: 19.3570
Epoch [1/3], Step [866/3236], Loss: 2.9028, Perplexity: 18.2254
Epoch [1/3], Step [867/3236], Loss: 2.6274, Perplexity: 13.8377
Epoch [1/3], Step [868/3236], Loss: 2.7285, Perplexity: 15.3103
Epoch [1/3], Step [869/3236], Loss: 2.8755, Perplexity: 17.7337
Epoch [1/3], Step [870/3236], Loss: 2.8918, Perplexity: 18.0259
Epoch [1/3], Step [871/3236], Loss: 2.5132, Perplexity: 12.3441
Epoch [1/3], Step [872/3236], Loss: 2.7562, Perplexity: 15.7397
Epoch [1/3], Step [873/3236], Loss: 2.8126, Perplexity: 16.6528
Epoch [1/3], Step [874/3236], Loss: 2.6790, Perplexity: 14.5702
Epoch [1/3], Step [875/3236], Loss: 2.8309, Perplexity: 16.9605
Epoch [1/3], Step [876/3236], Loss: 2.6316, Perplexity: 13.8956
Epoch [1/3], Step [877/3236], Loss: 2.6825, Perplexity: 14.6218
Epoch [1/3], Step [878/3236], Loss: 2.4677, Perplexity: 11.7948
Epoch [1/3], Step [879/3236], Loss: 2.6250, Perplexity: 13.8040
Epoch [1/3], Step [880/3236], Loss: 2.9529, Perplexity: 19.1620
Epoch [1/3], Step [881/3236], Loss: 2.7098, Perplexity: 15.0261
Epoch [1/3], Step [882/3236], Loss: 2.4546, Perplexity: 11.6415
Epoch [1/3], Step [883/3236], Loss: 2.7329, Perplexity: 15.3773
Epoch [1/3], Step [884/3236], Loss: 2.6199, Perplexity: 13.7341
Epoch [1/3], Step [885/3236], Loss: 2.7491, Perplexity: 15.6279
Epoch [1/3], Step [886/3236], Loss: 2.7561, Perplexity: 15.7388
Epoch [1/3], Step [887/3236], Loss: 2.6854, Perplexity: 14.6642
Epoch [1/3], Step [888/3236], Loss: 2.5828, Perplexity: 13.2338
Epoch [1/3], Step [889/3236], Loss: 2.5811, Perplexity: 13.2114
Epoch [1/3], Step [890/3236], Loss: 2.6750, Perplexity: 14.5124
Epoch [1/3], Step [891/3236], Loss: 2.5580, Perplexity: 12.9098
Epoch [1/3], Step [892/3236], Loss: 2.6736, Perplexity: 14.4920
Epoch [1/3], Step [893/3236], Loss: 2.7183, Perplexity: 15.1548
Epoch [1/3], Step [894/3236], Loss: 2.7076, Perplexity: 14.9927
Epoch [1/3], Step [895/3236], Loss: 2.7482, Perplexity: 15.6140
Epoch [1/3], Step [896/3236], Loss: 2.7582, Perplexity: 15.7713
Epoch [1/3], Step [897/3236], Loss: 2.6209, Perplexity: 13.7488
Epoch [1/3], Step [898/3236], Loss: 2.7233, Perplexity: 15.2303
Epoch [1/3], Step [899/3236], Loss: 3.4266, Perplexity: 30.7709
Epoch [1/3], Step [900/3236], Loss: 2.5802, Perplexity: 13.1993
Epoch [1/3], Step [901/3236], Loss: 2.6074, Perplexity: 13.5638
Epoch [1/3], Step [902/3236], Loss: 2.7019, Perplexity: 14.9086
Epoch [1/3], Step [903/3236], Loss: 2.4846, Perplexity: 11.9958
Epoch [1/3], Step [904/3236], Loss: 2.7506, Perplexity: 15.6518
Epoch [1/3], Step [905/3236], Loss: 2.7348, Perplexity: 15.4061
Epoch [1/3], Step [906/3236], Loss: 2.5871, Perplexity: 13.2915
Epoch [1/3], Step [907/3236], Loss: 2.7051, Perplexity: 14.9563
Epoch [1/3], Step [908/3236], Loss: 2.5664, Perplexity: 13.0185
Epoch [1/3], Step [909/3236], Loss: 2.4767, Perplexity: 11.9023
Epoch [1/3], Step [910/3236], Loss: 2.5687, Perplexity: 13.0483
Epoch [1/3], Step [911/3236], Loss: 2.9166, Perplexity: 18.4788
Epoch [1/3], Step [912/3236], Loss: 2.5677, Perplexity: 13.0352
Epoch [1/3], Step [913/3236], Loss: 2.7448, Perplexity: 15.5623
Epoch [1/3], Step [914/3236], Loss: 2.4676, Perplexity: 11.7939
Epoch [1/3], Step [915/3236], Loss: 2.6880, Perplexity: 14.7022
Epoch [1/3], Step [916/3236], Loss: 2.5826, Perplexity: 13.2313
Epoch [1/3], Step [917/3236], Loss: 2.4516, Perplexity: 11.6066
Epoch [1/3], Step [918/3236], Loss: 2.8849, Perplexity: 17.9023
Epoch [1/3], Step [919/3236], Loss: 2.5529, Perplexity: 12.8445
Epoch [1/3], Step [920/3236], Loss: 2.4828, Perplexity: 11.9752
Epoch [1/3], Step [921/3236], Loss: 2.5312, Perplexity: 12.5686
Epoch [1/3], Step [922/3236], Loss: 2.5082, Perplexity: 12.2829
Epoch [1/3], Step [923/3236], Loss: 2.7928, Perplexity: 16.3263
Epoch [1/3], Step [924/3236], Loss: 2.5931, Perplexity: 13.3708
Epoch [1/3], Step [925/3236], Loss: 2.7730, Perplexity: 16.0071
Epoch [1/3], Step [926/3236], Loss: 2.7075, Perplexity: 14.9912
Epoch [1/3], Step [927/3236], Loss: 2.5270, Perplexity: 12.5163
Epoch [1/3], Step [928/3236], Loss: 2.5631, Perplexity: 12.9762
Epoch [1/3], Step [929/3236], Loss: 2.5216, Perplexity: 12.4484
Epoch [1/3], Step [930/3236], Loss: 2.7250, Perplexity: 15.2567
Epoch [1/3], Step [931/3236], Loss: 2.6206, Perplexity: 13.7439
Epoch [1/3], Step [932/3236], Loss: 2.6876, Perplexity: 14.6969
Epoch [1/3], Step [933/3236], Loss: 2.4815, Perplexity: 11.9587
Epoch [1/3], Step [934/3236], Loss: 2.7208, Perplexity: 15.1923
Epoch [1/3], Step [935/3236], Loss: 2.7152, Perplexity: 15.1083
Epoch [1/3], Step [936/3236], Loss: 2.4670, Perplexity: 11.7867
Epoch [1/3], Step [937/3236], Loss: 2.6468, Perplexity: 14.1087
Epoch [1/3], Step [938/3236], Loss: 2.6194, Perplexity: 13.7277
Epoch [1/3], Step [939/3236], Loss: 2.5799, Perplexity: 13.1963
Epoch [1/3], Step [940/3236], Loss: 2.7741, Perplexity: 16.0235
Epoch [1/3], Step [941/3236], Loss: 2.6992, Perplexity: 14.8685
Epoch [1/3], Step [942/3236], Loss: 2.8303, Perplexity: 16.9502
Epoch [1/3], Step [943/3236], Loss: 2.9212, Perplexity: 18.5634
Epoch [1/3], Step [944/3236], Loss: 2.5847, Perplexity: 13.2593
Epoch [1/3], Step [945/3236], Loss: 2.5490, Perplexity: 12.7937
Epoch [1/3], Step [946/3236], Loss: 2.5658, Perplexity: 13.0110
Epoch [1/3], Step [947/3236], Loss: 2.7892, Perplexity: 16.2680
Epoch [1/3], Step [948/3236], Loss: 2.4870, Perplexity: 12.0249
Epoch [1/3], Step [949/3236], Loss: 3.3898, Perplexity: 29.6603
Epoch [1/3], Step [950/3236], Loss: 2.7460, Perplexity: 15.5803
Epoch [1/3], Step [951/3236], Loss: 2.5636, Perplexity: 12.9830
Epoch [1/3], Step [952/3236], Loss: 2.6566, Perplexity: 14.2481
Epoch [1/3], Step [953/3236], Loss: 2.5480, Perplexity: 12.7820
Epoch [1/3], Step [954/3236], Loss: 2.6362, Perplexity: 13.9599
Epoch [1/3], Step [955/3236], Loss: 2.8213, Perplexity: 16.7991
Epoch [1/3], Step [956/3236], Loss: 2.7996, Perplexity: 16.4384
Epoch [1/3], Step [957/3236], Loss: 3.1402, Perplexity: 23.1080
Epoch [1/3], Step [958/3236], Loss: 2.6223, Perplexity: 13.7676
Epoch [1/3], Step [959/3236], Loss: 2.6078, Perplexity: 13.5695
Epoch [1/3], Step [960/3236], Loss: 2.6463, Perplexity: 14.1023
Epoch [1/3], Step [961/3236], Loss: 2.6404, Perplexity: 14.0186
Epoch [1/3], Step [962/3236], Loss: 2.5654, Perplexity: 13.0065
Epoch [1/3], Step [963/3236], Loss: 2.6431, Perplexity: 14.0568
Epoch [1/3], Step [964/3236], Loss: 2.5734, Perplexity: 13.1098
Epoch [1/3], Step [965/3236], Loss: 3.0539, Perplexity: 21.1972
Epoch [1/3], Step [966/3236], Loss: 2.6512, Perplexity: 14.1704
Epoch [1/3], Step [967/3236], Loss: 2.5092, Perplexity: 12.2956
Epoch [1/3], Step [968/3236], Loss: 2.8409, Perplexity: 17.1310
Epoch [1/3], Step [969/3236], Loss: 2.5879, Perplexity: 13.3021
Epoch [1/3], Step [970/3236], Loss: 2.6150, Perplexity: 13.6668
Epoch [1/3], Step [971/3236], Loss: 2.6639, Perplexity: 14.3523
Epoch [1/3], Step [972/3236], Loss: 2.5483, Perplexity: 12.7857
Epoch [1/3], Step [973/3236], Loss: 2.6920, Perplexity: 14.7614
Epoch [1/3], Step [974/3236], Loss: 2.4659, Perplexity: 11.7737
Epoch [1/3], Step [975/3236], Loss: 2.6655, Perplexity: 14.3754
Epoch [1/3], Step [976/3236], Loss: 3.0957, Perplexity: 22.1038
Epoch [1/3], Step [977/3236], Loss: 2.5602, Perplexity: 12.9385
Epoch [1/3], Step [978/3236], Loss: 2.5152, Perplexity: 12.3694
Epoch [1/3], Step [979/3236], Loss: 2.8819, Perplexity: 17.8485
Epoch [1/3], Step [980/3236], Loss: 2.8910, Perplexity: 18.0115
Epoch [1/3], Step [981/3236], Loss: 3.0192, Perplexity: 20.4753
Epoch [1/3], Step [982/3236], Loss: 2.7150, Perplexity: 15.1053
Epoch [1/3], Step [983/3236], Loss: 2.5393, Perplexity: 12.6709
Epoch [1/3], Step [984/3236], Loss: 2.5796, Perplexity: 13.1913
Epoch [1/3], Step [985/3236], Loss: 3.1238, Perplexity: 22.7334
Epoch [1/3], Step [986/3236], Loss: 2.6623, Perplexity: 14.3296
Epoch [1/3], Step [987/3236], Loss: 2.4302, Perplexity: 11.3617
Epoch [1/3], Step [988/3236], Loss: 2.6154, Perplexity: 13.6732
Epoch [1/3], Step [989/3236], Loss: 2.7242, Perplexity: 15.2439
Epoch [1/3], Step [990/3236], Loss: 2.5444, Perplexity: 12.7358
Epoch [1/3], Step [991/3236], Loss: 2.6126, Perplexity: 13.6341
Epoch [1/3], Step [992/3236], Loss: 3.0374, Perplexity: 20.8506
Epoch [1/3], Step [993/3236], Loss: 2.6233, Perplexity: 13.7816
Epoch [1/3], Step [994/3236], Loss: 2.6185, Perplexity: 13.7152
Epoch [1/3], Step [995/3236], Loss: 2.4881, Perplexity: 12.0384
Epoch [1/3], Step [996/3236], Loss: 2.7259, Perplexity: 15.2708
Epoch [1/3], Step [997/3236], Loss: 2.5719, Perplexity: 13.0913
Epoch [1/3], Step [998/3236], Loss: 2.6087, Perplexity: 13.5813
Epoch [1/3], Step [999/3236], Loss: 2.8173, Perplexity: 16.7314
Epoch [1/3], Step [1000/3236], Loss: 2.5742, Perplexity: 13.1212