diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.04040126536763131, + "epoch": 0.08080253073526263, "eval_steps": 500, - "global_step": 20000, + "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -14007,6 +14007,14006 @@ "learning_rate": 4.000000000000001e-06, "loss": 26.4226, "step": 20000 + }, + { + "epoch": 0.04042146600031513, + "grad_norm": 307.7025146484375, + "learning_rate": 4.002e-06, + "loss": 32.5957, + "step": 20010 + }, + { + "epoch": 0.040441666632998946, + "grad_norm": 141.05995178222656, + "learning_rate": 4.004e-06, + "loss": 23.7471, + "step": 20020 + }, + { + "epoch": 0.04046186726568276, + "grad_norm": 291.870849609375, + "learning_rate": 4.006e-06, + "loss": 31.9492, + "step": 20030 + }, + { + "epoch": 0.04048206789836658, + "grad_norm": 417.44512939453125, + "learning_rate": 4.008e-06, + "loss": 41.5067, + "step": 20040 + }, + { + "epoch": 0.04050226853105039, + "grad_norm": 232.98028564453125, + "learning_rate": 4.0100000000000006e-06, + "loss": 38.7123, + "step": 20050 + }, + { + "epoch": 0.040522469163734205, + "grad_norm": 147.53305053710938, + "learning_rate": 4.012000000000001e-06, + "loss": 54.9667, + "step": 20060 + }, + { + "epoch": 0.040542669796418025, + "grad_norm": 233.66880798339844, + "learning_rate": 4.014e-06, + "loss": 44.2462, + "step": 20070 + }, + { + "epoch": 0.04056287042910184, + "grad_norm": 373.2441101074219, + "learning_rate": 4.016e-06, + "loss": 29.1594, + "step": 20080 + }, + { + "epoch": 0.04058307106178566, + "grad_norm": 569.9129638671875, + "learning_rate": 4.018e-06, + "loss": 39.5939, + "step": 20090 + }, + { + "epoch": 0.04060327169446947, + "grad_norm": 19.329669952392578, + "learning_rate": 4.0200000000000005e-06, + "loss": 20.1258, + "step": 20100 + }, + { + "epoch": 0.040623472327153284, + "grad_norm": 180.48561096191406, + "learning_rate": 4.022000000000001e-06, + "loss": 29.2828, + "step": 20110 + }, + { + "epoch": 0.040643672959837104, + "grad_norm": 1575.479736328125, + "learning_rate": 4.024e-06, + "loss": 37.484, + "step": 20120 + }, + { + "epoch": 0.04066387359252092, + "grad_norm": 228.2759552001953, + "learning_rate": 4.026e-06, + "loss": 37.5349, + "step": 20130 + }, + { + "epoch": 0.04068407422520473, + "grad_norm": 428.0335998535156, + "learning_rate": 4.028e-06, + "loss": 40.8519, + "step": 20140 + }, + { + "epoch": 0.04070427485788855, + "grad_norm": 82.67201232910156, + "learning_rate": 4.03e-06, + "loss": 34.8081, + "step": 20150 + }, + { + "epoch": 0.040724475490572364, + "grad_norm": 812.8284912109375, + "learning_rate": 4.0320000000000005e-06, + "loss": 52.8438, + "step": 20160 + }, + { + "epoch": 0.040744676123256184, + "grad_norm": 102.69956970214844, + "learning_rate": 4.034e-06, + "loss": 34.6039, + "step": 20170 + }, + { + "epoch": 0.04076487675594, + "grad_norm": 264.369873046875, + "learning_rate": 4.036000000000001e-06, + "loss": 44.1913, + "step": 20180 + }, + { + "epoch": 0.04078507738862381, + "grad_norm": 375.61328125, + "learning_rate": 4.038e-06, + "loss": 37.198, + "step": 20190 + }, + { + "epoch": 0.04080527802130763, + "grad_norm": 197.2830352783203, + "learning_rate": 4.04e-06, + "loss": 36.5702, + "step": 20200 + }, + { + "epoch": 0.04082547865399144, + "grad_norm": 387.1900329589844, + "learning_rate": 4.0420000000000004e-06, + "loss": 34.2572, + "step": 20210 + }, + { + "epoch": 0.040845679286675256, + "grad_norm": 841.2315673828125, + "learning_rate": 4.044e-06, + "loss": 41.9452, + "step": 20220 + }, + { + "epoch": 0.040865879919359076, + "grad_norm": 112.06974792480469, + "learning_rate": 4.046000000000001e-06, + "loss": 16.1985, + "step": 20230 + }, + { + "epoch": 0.04088608055204289, + "grad_norm": 448.9200134277344, + "learning_rate": 4.048e-06, + "loss": 27.3424, + "step": 20240 + }, + { + "epoch": 0.04090628118472671, + "grad_norm": 741.1703491210938, + "learning_rate": 4.05e-06, + "loss": 38.0227, + "step": 20250 + }, + { + "epoch": 0.04092648181741052, + "grad_norm": 327.67950439453125, + "learning_rate": 4.052e-06, + "loss": 48.0524, + "step": 20260 + }, + { + "epoch": 0.040946682450094335, + "grad_norm": 608.6464233398438, + "learning_rate": 4.0540000000000005e-06, + "loss": 28.6052, + "step": 20270 + }, + { + "epoch": 0.040966883082778155, + "grad_norm": 540.9326171875, + "learning_rate": 4.056000000000001e-06, + "loss": 46.8294, + "step": 20280 + }, + { + "epoch": 0.04098708371546197, + "grad_norm": 273.62530517578125, + "learning_rate": 4.058e-06, + "loss": 40.8854, + "step": 20290 + }, + { + "epoch": 0.04100728434814578, + "grad_norm": 326.904052734375, + "learning_rate": 4.060000000000001e-06, + "loss": 29.1135, + "step": 20300 + }, + { + "epoch": 0.0410274849808296, + "grad_norm": 458.88018798828125, + "learning_rate": 4.062e-06, + "loss": 41.2102, + "step": 20310 + }, + { + "epoch": 0.041047685613513414, + "grad_norm": 667.4222412109375, + "learning_rate": 4.064e-06, + "loss": 35.2916, + "step": 20320 + }, + { + "epoch": 0.041067886246197234, + "grad_norm": 315.7451477050781, + "learning_rate": 4.0660000000000005e-06, + "loss": 37.1972, + "step": 20330 + }, + { + "epoch": 0.04108808687888105, + "grad_norm": 428.32080078125, + "learning_rate": 4.068000000000001e-06, + "loss": 40.0218, + "step": 20340 + }, + { + "epoch": 0.04110828751156486, + "grad_norm": 124.36223602294922, + "learning_rate": 4.07e-06, + "loss": 40.3568, + "step": 20350 + }, + { + "epoch": 0.04112848814424868, + "grad_norm": 251.20816040039062, + "learning_rate": 4.072e-06, + "loss": 29.3377, + "step": 20360 + }, + { + "epoch": 0.04114868877693249, + "grad_norm": 212.65249633789062, + "learning_rate": 4.074e-06, + "loss": 45.3342, + "step": 20370 + }, + { + "epoch": 0.041168889409616306, + "grad_norm": 693.0851440429688, + "learning_rate": 4.0760000000000004e-06, + "loss": 58.4595, + "step": 20380 + }, + { + "epoch": 0.041189090042300126, + "grad_norm": 163.91427612304688, + "learning_rate": 4.078000000000001e-06, + "loss": 23.256, + "step": 20390 + }, + { + "epoch": 0.04120929067498394, + "grad_norm": 285.9828186035156, + "learning_rate": 4.08e-06, + "loss": 34.4963, + "step": 20400 + }, + { + "epoch": 0.04122949130766776, + "grad_norm": 500.408935546875, + "learning_rate": 4.082e-06, + "loss": 35.7673, + "step": 20410 + }, + { + "epoch": 0.04124969194035157, + "grad_norm": 388.328125, + "learning_rate": 4.084e-06, + "loss": 25.7379, + "step": 20420 + }, + { + "epoch": 0.041269892573035385, + "grad_norm": 348.4654541015625, + "learning_rate": 4.086e-06, + "loss": 45.9121, + "step": 20430 + }, + { + "epoch": 0.041290093205719205, + "grad_norm": 545.4322509765625, + "learning_rate": 4.0880000000000005e-06, + "loss": 42.0894, + "step": 20440 + }, + { + "epoch": 0.04131029383840302, + "grad_norm": 122.1491470336914, + "learning_rate": 4.09e-06, + "loss": 22.5061, + "step": 20450 + }, + { + "epoch": 0.04133049447108683, + "grad_norm": 311.8683776855469, + "learning_rate": 4.092000000000001e-06, + "loss": 25.4118, + "step": 20460 + }, + { + "epoch": 0.04135069510377065, + "grad_norm": 106.4710464477539, + "learning_rate": 4.094e-06, + "loss": 33.3134, + "step": 20470 + }, + { + "epoch": 0.041370895736454465, + "grad_norm": 696.3756103515625, + "learning_rate": 4.096e-06, + "loss": 41.182, + "step": 20480 + }, + { + "epoch": 0.041391096369138285, + "grad_norm": 259.6597900390625, + "learning_rate": 4.098e-06, + "loss": 50.8422, + "step": 20490 + }, + { + "epoch": 0.0414112970018221, + "grad_norm": 339.96966552734375, + "learning_rate": 4.1e-06, + "loss": 63.607, + "step": 20500 + }, + { + "epoch": 0.04143149763450591, + "grad_norm": 174.30322265625, + "learning_rate": 4.102000000000001e-06, + "loss": 17.6622, + "step": 20510 + }, + { + "epoch": 0.04145169826718973, + "grad_norm": 403.02362060546875, + "learning_rate": 4.104e-06, + "loss": 31.6906, + "step": 20520 + }, + { + "epoch": 0.041471898899873544, + "grad_norm": 305.7235412597656, + "learning_rate": 4.106e-06, + "loss": 33.6001, + "step": 20530 + }, + { + "epoch": 0.04149209953255736, + "grad_norm": 162.15066528320312, + "learning_rate": 4.108e-06, + "loss": 28.2985, + "step": 20540 + }, + { + "epoch": 0.04151230016524118, + "grad_norm": 868.5485229492188, + "learning_rate": 4.1100000000000005e-06, + "loss": 34.2084, + "step": 20550 + }, + { + "epoch": 0.04153250079792499, + "grad_norm": 251.6429443359375, + "learning_rate": 4.112000000000001e-06, + "loss": 48.9287, + "step": 20560 + }, + { + "epoch": 0.04155270143060881, + "grad_norm": 243.0496063232422, + "learning_rate": 4.114e-06, + "loss": 24.6031, + "step": 20570 + }, + { + "epoch": 0.04157290206329262, + "grad_norm": 292.49139404296875, + "learning_rate": 4.116000000000001e-06, + "loss": 22.1322, + "step": 20580 + }, + { + "epoch": 0.041593102695976436, + "grad_norm": 453.16912841796875, + "learning_rate": 4.118e-06, + "loss": 39.9018, + "step": 20590 + }, + { + "epoch": 0.041613303328660256, + "grad_norm": 231.3798065185547, + "learning_rate": 4.12e-06, + "loss": 46.0094, + "step": 20600 + }, + { + "epoch": 0.04163350396134407, + "grad_norm": 300.7149963378906, + "learning_rate": 4.1220000000000005e-06, + "loss": 26.9751, + "step": 20610 + }, + { + "epoch": 0.04165370459402788, + "grad_norm": 321.439453125, + "learning_rate": 4.124e-06, + "loss": 33.3333, + "step": 20620 + }, + { + "epoch": 0.0416739052267117, + "grad_norm": 167.4238739013672, + "learning_rate": 4.126000000000001e-06, + "loss": 30.8674, + "step": 20630 + }, + { + "epoch": 0.041694105859395515, + "grad_norm": 167.2349395751953, + "learning_rate": 4.128e-06, + "loss": 43.9423, + "step": 20640 + }, + { + "epoch": 0.041714306492079335, + "grad_norm": 277.2059020996094, + "learning_rate": 4.13e-06, + "loss": 30.5669, + "step": 20650 + }, + { + "epoch": 0.04173450712476315, + "grad_norm": 130.9712677001953, + "learning_rate": 4.132e-06, + "loss": 19.1888, + "step": 20660 + }, + { + "epoch": 0.04175470775744696, + "grad_norm": 289.81243896484375, + "learning_rate": 4.1340000000000006e-06, + "loss": 22.6288, + "step": 20670 + }, + { + "epoch": 0.04177490839013078, + "grad_norm": 627.8983764648438, + "learning_rate": 4.136000000000001e-06, + "loss": 30.9941, + "step": 20680 + }, + { + "epoch": 0.041795109022814594, + "grad_norm": 271.6875305175781, + "learning_rate": 4.138e-06, + "loss": 29.0029, + "step": 20690 + }, + { + "epoch": 0.04181530965549841, + "grad_norm": 438.3844909667969, + "learning_rate": 4.14e-06, + "loss": 37.4831, + "step": 20700 + }, + { + "epoch": 0.04183551028818223, + "grad_norm": 429.591796875, + "learning_rate": 4.142e-06, + "loss": 26.2826, + "step": 20710 + }, + { + "epoch": 0.04185571092086604, + "grad_norm": 241.58526611328125, + "learning_rate": 4.1440000000000005e-06, + "loss": 29.2798, + "step": 20720 + }, + { + "epoch": 0.04187591155354986, + "grad_norm": 260.7231140136719, + "learning_rate": 4.146000000000001e-06, + "loss": 24.107, + "step": 20730 + }, + { + "epoch": 0.041896112186233674, + "grad_norm": 878.5065307617188, + "learning_rate": 4.148000000000001e-06, + "loss": 60.2322, + "step": 20740 + }, + { + "epoch": 0.04191631281891749, + "grad_norm": 421.5115051269531, + "learning_rate": 4.15e-06, + "loss": 44.0793, + "step": 20750 + }, + { + "epoch": 0.04193651345160131, + "grad_norm": 294.0861511230469, + "learning_rate": 4.152e-06, + "loss": 40.8335, + "step": 20760 + }, + { + "epoch": 0.04195671408428512, + "grad_norm": 168.69158935546875, + "learning_rate": 4.154e-06, + "loss": 35.5561, + "step": 20770 + }, + { + "epoch": 0.04197691471696893, + "grad_norm": 96.60639953613281, + "learning_rate": 4.1560000000000005e-06, + "loss": 28.9656, + "step": 20780 + }, + { + "epoch": 0.04199711534965275, + "grad_norm": 311.1852722167969, + "learning_rate": 4.158000000000001e-06, + "loss": 35.748, + "step": 20790 + }, + { + "epoch": 0.042017315982336566, + "grad_norm": 188.48680114746094, + "learning_rate": 4.16e-06, + "loss": 34.3928, + "step": 20800 + }, + { + "epoch": 0.042037516615020386, + "grad_norm": 283.5986328125, + "learning_rate": 4.162e-06, + "loss": 40.0365, + "step": 20810 + }, + { + "epoch": 0.0420577172477042, + "grad_norm": 244.67515563964844, + "learning_rate": 4.164e-06, + "loss": 21.4508, + "step": 20820 + }, + { + "epoch": 0.04207791788038801, + "grad_norm": 229.06175231933594, + "learning_rate": 4.1660000000000004e-06, + "loss": 44.8772, + "step": 20830 + }, + { + "epoch": 0.04209811851307183, + "grad_norm": 272.0743713378906, + "learning_rate": 4.168000000000001e-06, + "loss": 23.9264, + "step": 20840 + }, + { + "epoch": 0.042118319145755645, + "grad_norm": 368.3594665527344, + "learning_rate": 4.17e-06, + "loss": 38.5595, + "step": 20850 + }, + { + "epoch": 0.04213851977843946, + "grad_norm": 233.93699645996094, + "learning_rate": 4.172000000000001e-06, + "loss": 33.6996, + "step": 20860 + }, + { + "epoch": 0.04215872041112328, + "grad_norm": 296.39434814453125, + "learning_rate": 4.174e-06, + "loss": 26.6863, + "step": 20870 + }, + { + "epoch": 0.04217892104380709, + "grad_norm": 456.282470703125, + "learning_rate": 4.176e-06, + "loss": 49.657, + "step": 20880 + }, + { + "epoch": 0.04219912167649091, + "grad_norm": 402.9183044433594, + "learning_rate": 4.1780000000000005e-06, + "loss": 44.8408, + "step": 20890 + }, + { + "epoch": 0.042219322309174724, + "grad_norm": 324.6549072265625, + "learning_rate": 4.18e-06, + "loss": 27.7556, + "step": 20900 + }, + { + "epoch": 0.04223952294185854, + "grad_norm": 376.4916687011719, + "learning_rate": 4.182000000000001e-06, + "loss": 32.6511, + "step": 20910 + }, + { + "epoch": 0.04225972357454236, + "grad_norm": 205.3048095703125, + "learning_rate": 4.184e-06, + "loss": 30.0404, + "step": 20920 + }, + { + "epoch": 0.04227992420722617, + "grad_norm": 162.9635467529297, + "learning_rate": 4.186e-06, + "loss": 33.686, + "step": 20930 + }, + { + "epoch": 0.04230012483990998, + "grad_norm": 80.64946746826172, + "learning_rate": 4.188e-06, + "loss": 15.7464, + "step": 20940 + }, + { + "epoch": 0.0423203254725938, + "grad_norm": 138.9059600830078, + "learning_rate": 4.1900000000000005e-06, + "loss": 18.966, + "step": 20950 + }, + { + "epoch": 0.042340526105277616, + "grad_norm": 290.88262939453125, + "learning_rate": 4.192000000000001e-06, + "loss": 34.8412, + "step": 20960 + }, + { + "epoch": 0.042360726737961436, + "grad_norm": 630.651123046875, + "learning_rate": 4.194e-06, + "loss": 24.099, + "step": 20970 + }, + { + "epoch": 0.04238092737064525, + "grad_norm": 153.69415283203125, + "learning_rate": 4.196e-06, + "loss": 50.662, + "step": 20980 + }, + { + "epoch": 0.04240112800332906, + "grad_norm": 249.28179931640625, + "learning_rate": 4.198e-06, + "loss": 37.3011, + "step": 20990 + }, + { + "epoch": 0.04242132863601288, + "grad_norm": 586.7985229492188, + "learning_rate": 4.2000000000000004e-06, + "loss": 36.2117, + "step": 21000 + }, + { + "epoch": 0.042441529268696696, + "grad_norm": 380.9582824707031, + "learning_rate": 4.202000000000001e-06, + "loss": 29.2529, + "step": 21010 + }, + { + "epoch": 0.04246172990138051, + "grad_norm": 85.7302474975586, + "learning_rate": 4.204e-06, + "loss": 40.0706, + "step": 21020 + }, + { + "epoch": 0.04248193053406433, + "grad_norm": 385.5627136230469, + "learning_rate": 4.206e-06, + "loss": 35.7786, + "step": 21030 + }, + { + "epoch": 0.04250213116674814, + "grad_norm": 458.6239929199219, + "learning_rate": 4.208e-06, + "loss": 35.8708, + "step": 21040 + }, + { + "epoch": 0.04252233179943196, + "grad_norm": 304.5676574707031, + "learning_rate": 4.21e-06, + "loss": 36.7744, + "step": 21050 + }, + { + "epoch": 0.042542532432115775, + "grad_norm": 145.11141967773438, + "learning_rate": 4.2120000000000005e-06, + "loss": 28.0551, + "step": 21060 + }, + { + "epoch": 0.04256273306479959, + "grad_norm": 402.4499206542969, + "learning_rate": 4.214000000000001e-06, + "loss": 28.9746, + "step": 21070 + }, + { + "epoch": 0.04258293369748341, + "grad_norm": 191.5008087158203, + "learning_rate": 4.216e-06, + "loss": 37.7505, + "step": 21080 + }, + { + "epoch": 0.04260313433016722, + "grad_norm": 68.45462799072266, + "learning_rate": 4.218e-06, + "loss": 30.1864, + "step": 21090 + }, + { + "epoch": 0.042623334962851034, + "grad_norm": 105.98591613769531, + "learning_rate": 4.22e-06, + "loss": 18.1124, + "step": 21100 + }, + { + "epoch": 0.042643535595534854, + "grad_norm": 761.5296020507812, + "learning_rate": 4.222e-06, + "loss": 27.7599, + "step": 21110 + }, + { + "epoch": 0.04266373622821867, + "grad_norm": 469.4954833984375, + "learning_rate": 4.2240000000000006e-06, + "loss": 32.8871, + "step": 21120 + }, + { + "epoch": 0.04268393686090249, + "grad_norm": 398.43231201171875, + "learning_rate": 4.226e-06, + "loss": 21.9109, + "step": 21130 + }, + { + "epoch": 0.0427041374935863, + "grad_norm": 367.3377685546875, + "learning_rate": 4.228000000000001e-06, + "loss": 27.4366, + "step": 21140 + }, + { + "epoch": 0.04272433812627011, + "grad_norm": 390.81591796875, + "learning_rate": 4.23e-06, + "loss": 35.5517, + "step": 21150 + }, + { + "epoch": 0.04274453875895393, + "grad_norm": 303.1268310546875, + "learning_rate": 4.232e-06, + "loss": 32.5997, + "step": 21160 + }, + { + "epoch": 0.042764739391637746, + "grad_norm": 332.6908264160156, + "learning_rate": 4.2340000000000005e-06, + "loss": 31.0615, + "step": 21170 + }, + { + "epoch": 0.04278494002432156, + "grad_norm": 357.15362548828125, + "learning_rate": 4.236e-06, + "loss": 27.4738, + "step": 21180 + }, + { + "epoch": 0.04280514065700538, + "grad_norm": 206.97450256347656, + "learning_rate": 4.238000000000001e-06, + "loss": 20.7976, + "step": 21190 + }, + { + "epoch": 0.04282534128968919, + "grad_norm": 279.89178466796875, + "learning_rate": 4.24e-06, + "loss": 43.8803, + "step": 21200 + }, + { + "epoch": 0.04284554192237301, + "grad_norm": 87.10535430908203, + "learning_rate": 4.242e-06, + "loss": 30.7336, + "step": 21210 + }, + { + "epoch": 0.042865742555056825, + "grad_norm": 318.570556640625, + "learning_rate": 4.244e-06, + "loss": 61.2978, + "step": 21220 + }, + { + "epoch": 0.04288594318774064, + "grad_norm": 500.3152160644531, + "learning_rate": 4.2460000000000005e-06, + "loss": 50.3606, + "step": 21230 + }, + { + "epoch": 0.04290614382042446, + "grad_norm": 409.52978515625, + "learning_rate": 4.248000000000001e-06, + "loss": 40.0934, + "step": 21240 + }, + { + "epoch": 0.04292634445310827, + "grad_norm": 196.73036193847656, + "learning_rate": 4.25e-06, + "loss": 30.2571, + "step": 21250 + }, + { + "epoch": 0.042946545085792084, + "grad_norm": 550.4962158203125, + "learning_rate": 4.252000000000001e-06, + "loss": 30.2084, + "step": 21260 + }, + { + "epoch": 0.042966745718475904, + "grad_norm": 99.06867218017578, + "learning_rate": 4.254e-06, + "loss": 39.2887, + "step": 21270 + }, + { + "epoch": 0.04298694635115972, + "grad_norm": 682.33154296875, + "learning_rate": 4.256e-06, + "loss": 40.1178, + "step": 21280 + }, + { + "epoch": 0.04300714698384353, + "grad_norm": 335.2230224609375, + "learning_rate": 4.2580000000000006e-06, + "loss": 40.0422, + "step": 21290 + }, + { + "epoch": 0.04302734761652735, + "grad_norm": 356.4090576171875, + "learning_rate": 4.26e-06, + "loss": 31.7668, + "step": 21300 + }, + { + "epoch": 0.043047548249211164, + "grad_norm": 387.0377502441406, + "learning_rate": 4.262000000000001e-06, + "loss": 36.7466, + "step": 21310 + }, + { + "epoch": 0.043067748881894984, + "grad_norm": 118.68424224853516, + "learning_rate": 4.264e-06, + "loss": 39.0243, + "step": 21320 + }, + { + "epoch": 0.0430879495145788, + "grad_norm": 229.70225524902344, + "learning_rate": 4.266e-06, + "loss": 26.9583, + "step": 21330 + }, + { + "epoch": 0.04310815014726261, + "grad_norm": 334.6313781738281, + "learning_rate": 4.2680000000000005e-06, + "loss": 37.4017, + "step": 21340 + }, + { + "epoch": 0.04312835077994643, + "grad_norm": 145.9667510986328, + "learning_rate": 4.270000000000001e-06, + "loss": 24.935, + "step": 21350 + }, + { + "epoch": 0.04314855141263024, + "grad_norm": 218.7997283935547, + "learning_rate": 4.272000000000001e-06, + "loss": 27.2796, + "step": 21360 + }, + { + "epoch": 0.043168752045314056, + "grad_norm": 209.17771911621094, + "learning_rate": 4.274e-06, + "loss": 34.5368, + "step": 21370 + }, + { + "epoch": 0.043188952677997876, + "grad_norm": 258.9840393066406, + "learning_rate": 4.276e-06, + "loss": 29.2008, + "step": 21380 + }, + { + "epoch": 0.04320915331068169, + "grad_norm": 283.368408203125, + "learning_rate": 4.278e-06, + "loss": 52.7408, + "step": 21390 + }, + { + "epoch": 0.04322935394336551, + "grad_norm": 309.7923583984375, + "learning_rate": 4.2800000000000005e-06, + "loss": 27.4384, + "step": 21400 + }, + { + "epoch": 0.04324955457604932, + "grad_norm": 275.92950439453125, + "learning_rate": 4.282000000000001e-06, + "loss": 35.0403, + "step": 21410 + }, + { + "epoch": 0.043269755208733135, + "grad_norm": 69.03273010253906, + "learning_rate": 4.284e-06, + "loss": 45.1349, + "step": 21420 + }, + { + "epoch": 0.043289955841416955, + "grad_norm": 198.91543579101562, + "learning_rate": 4.286e-06, + "loss": 43.4459, + "step": 21430 + }, + { + "epoch": 0.04331015647410077, + "grad_norm": 220.49310302734375, + "learning_rate": 4.288e-06, + "loss": 28.2382, + "step": 21440 + }, + { + "epoch": 0.04333035710678458, + "grad_norm": 447.1377868652344, + "learning_rate": 4.2900000000000004e-06, + "loss": 28.5718, + "step": 21450 + }, + { + "epoch": 0.0433505577394684, + "grad_norm": 493.10675048828125, + "learning_rate": 4.292000000000001e-06, + "loss": 43.5559, + "step": 21460 + }, + { + "epoch": 0.043370758372152214, + "grad_norm": 463.8993225097656, + "learning_rate": 4.294000000000001e-06, + "loss": 30.0089, + "step": 21470 + }, + { + "epoch": 0.043390959004836034, + "grad_norm": 132.20335388183594, + "learning_rate": 4.296e-06, + "loss": 36.3238, + "step": 21480 + }, + { + "epoch": 0.04341115963751985, + "grad_norm": 255.9840850830078, + "learning_rate": 4.298e-06, + "loss": 49.801, + "step": 21490 + }, + { + "epoch": 0.04343136027020366, + "grad_norm": 36.15985870361328, + "learning_rate": 4.3e-06, + "loss": 43.9862, + "step": 21500 + }, + { + "epoch": 0.04345156090288748, + "grad_norm": 467.6071472167969, + "learning_rate": 4.3020000000000005e-06, + "loss": 24.1195, + "step": 21510 + }, + { + "epoch": 0.04347176153557129, + "grad_norm": 1086.1910400390625, + "learning_rate": 4.304000000000001e-06, + "loss": 81.2888, + "step": 21520 + }, + { + "epoch": 0.043491962168255106, + "grad_norm": 307.3807067871094, + "learning_rate": 4.306e-06, + "loss": 30.767, + "step": 21530 + }, + { + "epoch": 0.043512162800938926, + "grad_norm": 321.68975830078125, + "learning_rate": 4.308000000000001e-06, + "loss": 42.3517, + "step": 21540 + }, + { + "epoch": 0.04353236343362274, + "grad_norm": 282.8983459472656, + "learning_rate": 4.31e-06, + "loss": 30.696, + "step": 21550 + }, + { + "epoch": 0.04355256406630656, + "grad_norm": 210.27423095703125, + "learning_rate": 4.312e-06, + "loss": 18.0205, + "step": 21560 + }, + { + "epoch": 0.04357276469899037, + "grad_norm": 36.78266906738281, + "learning_rate": 4.3140000000000005e-06, + "loss": 32.5776, + "step": 21570 + }, + { + "epoch": 0.043592965331674186, + "grad_norm": 416.0316162109375, + "learning_rate": 4.316e-06, + "loss": 42.8172, + "step": 21580 + }, + { + "epoch": 0.043613165964358006, + "grad_norm": 762.2164916992188, + "learning_rate": 4.318000000000001e-06, + "loss": 33.9217, + "step": 21590 + }, + { + "epoch": 0.04363336659704182, + "grad_norm": 359.6988525390625, + "learning_rate": 4.32e-06, + "loss": 32.7206, + "step": 21600 + }, + { + "epoch": 0.04365356722972563, + "grad_norm": 395.9289855957031, + "learning_rate": 4.322e-06, + "loss": 35.3105, + "step": 21610 + }, + { + "epoch": 0.04367376786240945, + "grad_norm": 169.65855407714844, + "learning_rate": 4.3240000000000004e-06, + "loss": 53.9002, + "step": 21620 + }, + { + "epoch": 0.043693968495093265, + "grad_norm": 235.93019104003906, + "learning_rate": 4.326000000000001e-06, + "loss": 44.7782, + "step": 21630 + }, + { + "epoch": 0.043714169127777085, + "grad_norm": 285.8537902832031, + "learning_rate": 4.328000000000001e-06, + "loss": 34.2965, + "step": 21640 + }, + { + "epoch": 0.0437343697604609, + "grad_norm": 101.22665405273438, + "learning_rate": 4.33e-06, + "loss": 40.3858, + "step": 21650 + }, + { + "epoch": 0.04375457039314471, + "grad_norm": 259.66943359375, + "learning_rate": 4.332e-06, + "loss": 42.1844, + "step": 21660 + }, + { + "epoch": 0.04377477102582853, + "grad_norm": 0.0, + "learning_rate": 4.334e-06, + "loss": 22.2301, + "step": 21670 + }, + { + "epoch": 0.043794971658512344, + "grad_norm": 1392.5623779296875, + "learning_rate": 4.3360000000000005e-06, + "loss": 44.3904, + "step": 21680 + }, + { + "epoch": 0.04381517229119616, + "grad_norm": 336.18157958984375, + "learning_rate": 4.338000000000001e-06, + "loss": 39.7264, + "step": 21690 + }, + { + "epoch": 0.04383537292387998, + "grad_norm": 374.5557861328125, + "learning_rate": 4.34e-06, + "loss": 49.7355, + "step": 21700 + }, + { + "epoch": 0.04385557355656379, + "grad_norm": 280.235595703125, + "learning_rate": 4.342e-06, + "loss": 36.2126, + "step": 21710 + }, + { + "epoch": 0.04387577418924761, + "grad_norm": 228.18426513671875, + "learning_rate": 4.344e-06, + "loss": 27.9623, + "step": 21720 + }, + { + "epoch": 0.04389597482193142, + "grad_norm": 472.9103698730469, + "learning_rate": 4.346e-06, + "loss": 47.5958, + "step": 21730 + }, + { + "epoch": 0.043916175454615236, + "grad_norm": 307.89508056640625, + "learning_rate": 4.3480000000000006e-06, + "loss": 41.9201, + "step": 21740 + }, + { + "epoch": 0.043936376087299056, + "grad_norm": 601.6486206054688, + "learning_rate": 4.350000000000001e-06, + "loss": 47.6524, + "step": 21750 + }, + { + "epoch": 0.04395657671998287, + "grad_norm": 507.53900146484375, + "learning_rate": 4.352e-06, + "loss": 36.4503, + "step": 21760 + }, + { + "epoch": 0.04397677735266668, + "grad_norm": 192.6024932861328, + "learning_rate": 4.354e-06, + "loss": 36.7982, + "step": 21770 + }, + { + "epoch": 0.0439969779853505, + "grad_norm": 127.53495788574219, + "learning_rate": 4.356e-06, + "loss": 36.497, + "step": 21780 + }, + { + "epoch": 0.044017178618034315, + "grad_norm": 300.4747009277344, + "learning_rate": 4.3580000000000005e-06, + "loss": 32.1423, + "step": 21790 + }, + { + "epoch": 0.044037379250718135, + "grad_norm": 238.71231079101562, + "learning_rate": 4.360000000000001e-06, + "loss": 38.0991, + "step": 21800 + }, + { + "epoch": 0.04405757988340195, + "grad_norm": 84.91683959960938, + "learning_rate": 4.362e-06, + "loss": 22.3992, + "step": 21810 + }, + { + "epoch": 0.04407778051608576, + "grad_norm": 438.54864501953125, + "learning_rate": 4.364e-06, + "loss": 29.2043, + "step": 21820 + }, + { + "epoch": 0.04409798114876958, + "grad_norm": 780.3211059570312, + "learning_rate": 4.366e-06, + "loss": 36.352, + "step": 21830 + }, + { + "epoch": 0.044118181781453394, + "grad_norm": 217.75856018066406, + "learning_rate": 4.368e-06, + "loss": 24.0482, + "step": 21840 + }, + { + "epoch": 0.04413838241413721, + "grad_norm": 439.7067565917969, + "learning_rate": 4.3700000000000005e-06, + "loss": 37.1481, + "step": 21850 + }, + { + "epoch": 0.04415858304682103, + "grad_norm": 309.642578125, + "learning_rate": 4.372e-06, + "loss": 26.6976, + "step": 21860 + }, + { + "epoch": 0.04417878367950484, + "grad_norm": 169.34609985351562, + "learning_rate": 4.374000000000001e-06, + "loss": 25.6863, + "step": 21870 + }, + { + "epoch": 0.04419898431218866, + "grad_norm": 357.8691101074219, + "learning_rate": 4.376e-06, + "loss": 50.9932, + "step": 21880 + }, + { + "epoch": 0.044219184944872474, + "grad_norm": 397.551513671875, + "learning_rate": 4.378e-06, + "loss": 23.5169, + "step": 21890 + }, + { + "epoch": 0.04423938557755629, + "grad_norm": 453.8232727050781, + "learning_rate": 4.38e-06, + "loss": 24.9619, + "step": 21900 + }, + { + "epoch": 0.04425958621024011, + "grad_norm": 287.2008972167969, + "learning_rate": 4.382e-06, + "loss": 18.2778, + "step": 21910 + }, + { + "epoch": 0.04427978684292392, + "grad_norm": 167.90672302246094, + "learning_rate": 4.384000000000001e-06, + "loss": 27.0911, + "step": 21920 + }, + { + "epoch": 0.04429998747560773, + "grad_norm": 222.32225036621094, + "learning_rate": 4.386e-06, + "loss": 23.165, + "step": 21930 + }, + { + "epoch": 0.04432018810829155, + "grad_norm": 32.362789154052734, + "learning_rate": 4.388e-06, + "loss": 20.8344, + "step": 21940 + }, + { + "epoch": 0.044340388740975366, + "grad_norm": 1063.19140625, + "learning_rate": 4.39e-06, + "loss": 51.5889, + "step": 21950 + }, + { + "epoch": 0.044360589373659186, + "grad_norm": 151.3734130859375, + "learning_rate": 4.3920000000000005e-06, + "loss": 16.7795, + "step": 21960 + }, + { + "epoch": 0.044380790006343, + "grad_norm": 587.815673828125, + "learning_rate": 4.394000000000001e-06, + "loss": 67.0913, + "step": 21970 + }, + { + "epoch": 0.04440099063902681, + "grad_norm": 45.843868255615234, + "learning_rate": 4.396e-06, + "loss": 60.2924, + "step": 21980 + }, + { + "epoch": 0.04442119127171063, + "grad_norm": 256.3442687988281, + "learning_rate": 4.398000000000001e-06, + "loss": 43.1731, + "step": 21990 + }, + { + "epoch": 0.044441391904394445, + "grad_norm": 69.2979965209961, + "learning_rate": 4.4e-06, + "loss": 22.1585, + "step": 22000 + }, + { + "epoch": 0.04446159253707826, + "grad_norm": 413.2718200683594, + "learning_rate": 4.402e-06, + "loss": 38.8711, + "step": 22010 + }, + { + "epoch": 0.04448179316976208, + "grad_norm": 1238.522216796875, + "learning_rate": 4.4040000000000005e-06, + "loss": 42.8863, + "step": 22020 + }, + { + "epoch": 0.04450199380244589, + "grad_norm": 415.684326171875, + "learning_rate": 4.406000000000001e-06, + "loss": 41.3031, + "step": 22030 + }, + { + "epoch": 0.04452219443512971, + "grad_norm": 616.5980834960938, + "learning_rate": 4.408000000000001e-06, + "loss": 28.795, + "step": 22040 + }, + { + "epoch": 0.044542395067813524, + "grad_norm": 567.5768432617188, + "learning_rate": 4.41e-06, + "loss": 43.7818, + "step": 22050 + }, + { + "epoch": 0.04456259570049734, + "grad_norm": 226.39715576171875, + "learning_rate": 4.412e-06, + "loss": 31.4243, + "step": 22060 + }, + { + "epoch": 0.04458279633318116, + "grad_norm": 277.405517578125, + "learning_rate": 4.4140000000000004e-06, + "loss": 43.7672, + "step": 22070 + }, + { + "epoch": 0.04460299696586497, + "grad_norm": 228.32418823242188, + "learning_rate": 4.416000000000001e-06, + "loss": 32.853, + "step": 22080 + }, + { + "epoch": 0.04462319759854878, + "grad_norm": 294.9041442871094, + "learning_rate": 4.418000000000001e-06, + "loss": 34.7952, + "step": 22090 + }, + { + "epoch": 0.0446433982312326, + "grad_norm": 178.13169860839844, + "learning_rate": 4.42e-06, + "loss": 23.8155, + "step": 22100 + }, + { + "epoch": 0.044663598863916416, + "grad_norm": 281.69317626953125, + "learning_rate": 4.422e-06, + "loss": 29.139, + "step": 22110 + }, + { + "epoch": 0.044683799496600236, + "grad_norm": 252.3081817626953, + "learning_rate": 4.424e-06, + "loss": 18.0647, + "step": 22120 + }, + { + "epoch": 0.04470400012928405, + "grad_norm": 67.35968017578125, + "learning_rate": 4.4260000000000005e-06, + "loss": 19.8382, + "step": 22130 + }, + { + "epoch": 0.04472420076196786, + "grad_norm": 135.77525329589844, + "learning_rate": 4.428000000000001e-06, + "loss": 32.8386, + "step": 22140 + }, + { + "epoch": 0.04474440139465168, + "grad_norm": 611.4405517578125, + "learning_rate": 4.430000000000001e-06, + "loss": 34.8868, + "step": 22150 + }, + { + "epoch": 0.044764602027335496, + "grad_norm": 271.38336181640625, + "learning_rate": 4.432e-06, + "loss": 36.5795, + "step": 22160 + }, + { + "epoch": 0.04478480266001931, + "grad_norm": 321.98797607421875, + "learning_rate": 4.434e-06, + "loss": 28.9508, + "step": 22170 + }, + { + "epoch": 0.04480500329270313, + "grad_norm": 532.3291625976562, + "learning_rate": 4.436e-06, + "loss": 25.2873, + "step": 22180 + }, + { + "epoch": 0.04482520392538694, + "grad_norm": 192.2565460205078, + "learning_rate": 4.438e-06, + "loss": 22.3823, + "step": 22190 + }, + { + "epoch": 0.04484540455807076, + "grad_norm": 393.37213134765625, + "learning_rate": 4.440000000000001e-06, + "loss": 67.2099, + "step": 22200 + }, + { + "epoch": 0.044865605190754575, + "grad_norm": 0.0, + "learning_rate": 4.442e-06, + "loss": 41.9024, + "step": 22210 + }, + { + "epoch": 0.04488580582343839, + "grad_norm": 355.0119934082031, + "learning_rate": 4.444e-06, + "loss": 24.5312, + "step": 22220 + }, + { + "epoch": 0.04490600645612221, + "grad_norm": 1475.906982421875, + "learning_rate": 4.446e-06, + "loss": 41.4057, + "step": 22230 + }, + { + "epoch": 0.04492620708880602, + "grad_norm": 277.4278869628906, + "learning_rate": 4.4480000000000004e-06, + "loss": 25.8214, + "step": 22240 + }, + { + "epoch": 0.044946407721489834, + "grad_norm": 106.08101654052734, + "learning_rate": 4.450000000000001e-06, + "loss": 15.1555, + "step": 22250 + }, + { + "epoch": 0.044966608354173654, + "grad_norm": 300.8702697753906, + "learning_rate": 4.452e-06, + "loss": 17.3687, + "step": 22260 + }, + { + "epoch": 0.04498680898685747, + "grad_norm": 495.77703857421875, + "learning_rate": 4.454000000000001e-06, + "loss": 36.3726, + "step": 22270 + }, + { + "epoch": 0.04500700961954129, + "grad_norm": 225.75399780273438, + "learning_rate": 4.456e-06, + "loss": 49.7217, + "step": 22280 + }, + { + "epoch": 0.0450272102522251, + "grad_norm": 921.94677734375, + "learning_rate": 4.458e-06, + "loss": 58.6378, + "step": 22290 + }, + { + "epoch": 0.04504741088490891, + "grad_norm": 230.3359375, + "learning_rate": 4.4600000000000005e-06, + "loss": 28.7533, + "step": 22300 + }, + { + "epoch": 0.04506761151759273, + "grad_norm": 188.73988342285156, + "learning_rate": 4.462e-06, + "loss": 24.0754, + "step": 22310 + }, + { + "epoch": 0.045087812150276546, + "grad_norm": 361.24432373046875, + "learning_rate": 4.464000000000001e-06, + "loss": 47.7305, + "step": 22320 + }, + { + "epoch": 0.04510801278296036, + "grad_norm": 221.80642700195312, + "learning_rate": 4.466e-06, + "loss": 33.0072, + "step": 22330 + }, + { + "epoch": 0.04512821341564418, + "grad_norm": 256.0701599121094, + "learning_rate": 4.468e-06, + "loss": 28.1957, + "step": 22340 + }, + { + "epoch": 0.04514841404832799, + "grad_norm": 254.6149444580078, + "learning_rate": 4.47e-06, + "loss": 27.2295, + "step": 22350 + }, + { + "epoch": 0.04516861468101181, + "grad_norm": 353.35107421875, + "learning_rate": 4.4720000000000006e-06, + "loss": 26.6498, + "step": 22360 + }, + { + "epoch": 0.045188815313695625, + "grad_norm": 288.8414611816406, + "learning_rate": 4.474000000000001e-06, + "loss": 26.8834, + "step": 22370 + }, + { + "epoch": 0.04520901594637944, + "grad_norm": 479.2511291503906, + "learning_rate": 4.476e-06, + "loss": 27.7844, + "step": 22380 + }, + { + "epoch": 0.04522921657906326, + "grad_norm": 160.38461303710938, + "learning_rate": 4.478e-06, + "loss": 50.0185, + "step": 22390 + }, + { + "epoch": 0.04524941721174707, + "grad_norm": 309.38104248046875, + "learning_rate": 4.48e-06, + "loss": 59.0626, + "step": 22400 + }, + { + "epoch": 0.045269617844430884, + "grad_norm": 464.31689453125, + "learning_rate": 4.4820000000000005e-06, + "loss": 34.7741, + "step": 22410 + }, + { + "epoch": 0.045289818477114704, + "grad_norm": 312.4227294921875, + "learning_rate": 4.484000000000001e-06, + "loss": 35.0636, + "step": 22420 + }, + { + "epoch": 0.04531001910979852, + "grad_norm": 598.5370483398438, + "learning_rate": 4.486000000000001e-06, + "loss": 42.6343, + "step": 22430 + }, + { + "epoch": 0.04533021974248234, + "grad_norm": 370.20147705078125, + "learning_rate": 4.488e-06, + "loss": 44.871, + "step": 22440 + }, + { + "epoch": 0.04535042037516615, + "grad_norm": 195.80914306640625, + "learning_rate": 4.49e-06, + "loss": 31.4365, + "step": 22450 + }, + { + "epoch": 0.045370621007849964, + "grad_norm": 362.7513427734375, + "learning_rate": 4.492e-06, + "loss": 28.8031, + "step": 22460 + }, + { + "epoch": 0.045390821640533784, + "grad_norm": 921.4364624023438, + "learning_rate": 4.4940000000000005e-06, + "loss": 39.8159, + "step": 22470 + }, + { + "epoch": 0.0454110222732176, + "grad_norm": 613.3515014648438, + "learning_rate": 4.496000000000001e-06, + "loss": 56.5131, + "step": 22480 + }, + { + "epoch": 0.04543122290590141, + "grad_norm": 354.8691101074219, + "learning_rate": 4.498e-06, + "loss": 32.4083, + "step": 22490 + }, + { + "epoch": 0.04545142353858523, + "grad_norm": 226.96910095214844, + "learning_rate": 4.5e-06, + "loss": 28.8302, + "step": 22500 + }, + { + "epoch": 0.04547162417126904, + "grad_norm": 466.5164489746094, + "learning_rate": 4.502e-06, + "loss": 32.9258, + "step": 22510 + }, + { + "epoch": 0.04549182480395286, + "grad_norm": 357.9403381347656, + "learning_rate": 4.504e-06, + "loss": 44.7259, + "step": 22520 + }, + { + "epoch": 0.045512025436636676, + "grad_norm": 212.605712890625, + "learning_rate": 4.5060000000000006e-06, + "loss": 40.4262, + "step": 22530 + }, + { + "epoch": 0.04553222606932049, + "grad_norm": 564.6653442382812, + "learning_rate": 4.508e-06, + "loss": 40.581, + "step": 22540 + }, + { + "epoch": 0.04555242670200431, + "grad_norm": 306.528564453125, + "learning_rate": 4.510000000000001e-06, + "loss": 25.7713, + "step": 22550 + }, + { + "epoch": 0.04557262733468812, + "grad_norm": 234.44297790527344, + "learning_rate": 4.512e-06, + "loss": 29.1724, + "step": 22560 + }, + { + "epoch": 0.045592827967371935, + "grad_norm": 160.4866485595703, + "learning_rate": 4.514e-06, + "loss": 62.8937, + "step": 22570 + }, + { + "epoch": 0.045613028600055755, + "grad_norm": 1656.3267822265625, + "learning_rate": 4.5160000000000005e-06, + "loss": 41.0898, + "step": 22580 + }, + { + "epoch": 0.04563322923273957, + "grad_norm": 350.4644775390625, + "learning_rate": 4.518e-06, + "loss": 33.7665, + "step": 22590 + }, + { + "epoch": 0.04565342986542339, + "grad_norm": 100.78395080566406, + "learning_rate": 4.520000000000001e-06, + "loss": 17.6624, + "step": 22600 + }, + { + "epoch": 0.0456736304981072, + "grad_norm": 213.51217651367188, + "learning_rate": 4.522e-06, + "loss": 29.5539, + "step": 22610 + }, + { + "epoch": 0.045693831130791014, + "grad_norm": 164.36734008789062, + "learning_rate": 4.524e-06, + "loss": 39.8798, + "step": 22620 + }, + { + "epoch": 0.045714031763474834, + "grad_norm": 450.3773193359375, + "learning_rate": 4.526e-06, + "loss": 39.0242, + "step": 22630 + }, + { + "epoch": 0.04573423239615865, + "grad_norm": 311.81683349609375, + "learning_rate": 4.5280000000000005e-06, + "loss": 35.9892, + "step": 22640 + }, + { + "epoch": 0.04575443302884246, + "grad_norm": 315.1264343261719, + "learning_rate": 4.530000000000001e-06, + "loss": 26.1883, + "step": 22650 + }, + { + "epoch": 0.04577463366152628, + "grad_norm": 1015.3447265625, + "learning_rate": 4.532e-06, + "loss": 19.782, + "step": 22660 + }, + { + "epoch": 0.04579483429421009, + "grad_norm": 146.39332580566406, + "learning_rate": 4.534000000000001e-06, + "loss": 26.6612, + "step": 22670 + }, + { + "epoch": 0.04581503492689391, + "grad_norm": 575.60693359375, + "learning_rate": 4.536e-06, + "loss": 39.4156, + "step": 22680 + }, + { + "epoch": 0.045835235559577726, + "grad_norm": 135.11111450195312, + "learning_rate": 4.5380000000000004e-06, + "loss": 42.7714, + "step": 22690 + }, + { + "epoch": 0.04585543619226154, + "grad_norm": 62.020042419433594, + "learning_rate": 4.540000000000001e-06, + "loss": 41.5205, + "step": 22700 + }, + { + "epoch": 0.04587563682494536, + "grad_norm": 418.8486633300781, + "learning_rate": 4.542e-06, + "loss": 42.9501, + "step": 22710 + }, + { + "epoch": 0.04589583745762917, + "grad_norm": 317.9814147949219, + "learning_rate": 4.544000000000001e-06, + "loss": 50.0577, + "step": 22720 + }, + { + "epoch": 0.045916038090312986, + "grad_norm": 311.05535888671875, + "learning_rate": 4.546e-06, + "loss": 36.0328, + "step": 22730 + }, + { + "epoch": 0.045936238722996806, + "grad_norm": 309.5693664550781, + "learning_rate": 4.548e-06, + "loss": 17.6359, + "step": 22740 + }, + { + "epoch": 0.04595643935568062, + "grad_norm": 269.7965393066406, + "learning_rate": 4.5500000000000005e-06, + "loss": 29.5682, + "step": 22750 + }, + { + "epoch": 0.04597663998836444, + "grad_norm": 201.2880859375, + "learning_rate": 4.552000000000001e-06, + "loss": 37.9817, + "step": 22760 + }, + { + "epoch": 0.04599684062104825, + "grad_norm": 97.00022888183594, + "learning_rate": 4.554000000000001e-06, + "loss": 24.4323, + "step": 22770 + }, + { + "epoch": 0.046017041253732065, + "grad_norm": 216.03265380859375, + "learning_rate": 4.556e-06, + "loss": 31.051, + "step": 22780 + }, + { + "epoch": 0.046037241886415885, + "grad_norm": 357.0312194824219, + "learning_rate": 4.558e-06, + "loss": 24.9925, + "step": 22790 + }, + { + "epoch": 0.0460574425190997, + "grad_norm": 289.9405517578125, + "learning_rate": 4.56e-06, + "loss": 40.7071, + "step": 22800 + }, + { + "epoch": 0.04607764315178351, + "grad_norm": 278.6811828613281, + "learning_rate": 4.5620000000000005e-06, + "loss": 27.6926, + "step": 22810 + }, + { + "epoch": 0.04609784378446733, + "grad_norm": 350.1117248535156, + "learning_rate": 4.564e-06, + "loss": 28.5968, + "step": 22820 + }, + { + "epoch": 0.046118044417151144, + "grad_norm": 224.86300659179688, + "learning_rate": 4.566000000000001e-06, + "loss": 41.4569, + "step": 22830 + }, + { + "epoch": 0.046138245049834964, + "grad_norm": 513.5762939453125, + "learning_rate": 4.568e-06, + "loss": 43.4404, + "step": 22840 + }, + { + "epoch": 0.04615844568251878, + "grad_norm": 153.2064666748047, + "learning_rate": 4.57e-06, + "loss": 28.0236, + "step": 22850 + }, + { + "epoch": 0.04617864631520259, + "grad_norm": 478.1938781738281, + "learning_rate": 4.5720000000000004e-06, + "loss": 38.9502, + "step": 22860 + }, + { + "epoch": 0.04619884694788641, + "grad_norm": 835.1389770507812, + "learning_rate": 4.574e-06, + "loss": 30.5866, + "step": 22870 + }, + { + "epoch": 0.04621904758057022, + "grad_norm": 449.0034484863281, + "learning_rate": 4.576000000000001e-06, + "loss": 38.8385, + "step": 22880 + }, + { + "epoch": 0.046239248213254036, + "grad_norm": 122.23052215576172, + "learning_rate": 4.578e-06, + "loss": 34.8877, + "step": 22890 + }, + { + "epoch": 0.046259448845937856, + "grad_norm": 250.9205322265625, + "learning_rate": 4.58e-06, + "loss": 26.8342, + "step": 22900 + }, + { + "epoch": 0.04627964947862167, + "grad_norm": 373.60589599609375, + "learning_rate": 4.582e-06, + "loss": 42.1963, + "step": 22910 + }, + { + "epoch": 0.04629985011130549, + "grad_norm": 236.33836364746094, + "learning_rate": 4.5840000000000005e-06, + "loss": 36.0522, + "step": 22920 + }, + { + "epoch": 0.0463200507439893, + "grad_norm": 301.6274108886719, + "learning_rate": 4.586000000000001e-06, + "loss": 43.6349, + "step": 22930 + }, + { + "epoch": 0.046340251376673115, + "grad_norm": 432.77850341796875, + "learning_rate": 4.588e-06, + "loss": 22.031, + "step": 22940 + }, + { + "epoch": 0.046360452009356935, + "grad_norm": 330.04632568359375, + "learning_rate": 4.590000000000001e-06, + "loss": 30.1454, + "step": 22950 + }, + { + "epoch": 0.04638065264204075, + "grad_norm": 481.8309631347656, + "learning_rate": 4.592e-06, + "loss": 49.3018, + "step": 22960 + }, + { + "epoch": 0.04640085327472456, + "grad_norm": 182.36968994140625, + "learning_rate": 4.594e-06, + "loss": 29.0452, + "step": 22970 + }, + { + "epoch": 0.04642105390740838, + "grad_norm": 183.73507690429688, + "learning_rate": 4.5960000000000006e-06, + "loss": 30.6057, + "step": 22980 + }, + { + "epoch": 0.046441254540092194, + "grad_norm": 215.35194396972656, + "learning_rate": 4.598e-06, + "loss": 30.7552, + "step": 22990 + }, + { + "epoch": 0.046461455172776014, + "grad_norm": 397.8327331542969, + "learning_rate": 4.600000000000001e-06, + "loss": 38.2998, + "step": 23000 + }, + { + "epoch": 0.04648165580545983, + "grad_norm": 362.9880676269531, + "learning_rate": 4.602e-06, + "loss": 36.6341, + "step": 23010 + }, + { + "epoch": 0.04650185643814364, + "grad_norm": 192.90228271484375, + "learning_rate": 4.604e-06, + "loss": 26.8832, + "step": 23020 + }, + { + "epoch": 0.04652205707082746, + "grad_norm": 115.16897583007812, + "learning_rate": 4.6060000000000005e-06, + "loss": 38.8671, + "step": 23030 + }, + { + "epoch": 0.046542257703511274, + "grad_norm": 132.3080291748047, + "learning_rate": 4.608000000000001e-06, + "loss": 25.9005, + "step": 23040 + }, + { + "epoch": 0.04656245833619509, + "grad_norm": 241.00857543945312, + "learning_rate": 4.610000000000001e-06, + "loss": 35.1976, + "step": 23050 + }, + { + "epoch": 0.04658265896887891, + "grad_norm": 297.29949951171875, + "learning_rate": 4.612e-06, + "loss": 27.1766, + "step": 23060 + }, + { + "epoch": 0.04660285960156272, + "grad_norm": 91.31853485107422, + "learning_rate": 4.614e-06, + "loss": 48.7416, + "step": 23070 + }, + { + "epoch": 0.04662306023424654, + "grad_norm": 254.75955200195312, + "learning_rate": 4.616e-06, + "loss": 25.2411, + "step": 23080 + }, + { + "epoch": 0.04664326086693035, + "grad_norm": 454.9356384277344, + "learning_rate": 4.6180000000000005e-06, + "loss": 26.4849, + "step": 23090 + }, + { + "epoch": 0.046663461499614166, + "grad_norm": 411.7412414550781, + "learning_rate": 4.620000000000001e-06, + "loss": 50.9135, + "step": 23100 + }, + { + "epoch": 0.046683662132297986, + "grad_norm": 363.6336364746094, + "learning_rate": 4.622e-06, + "loss": 63.1865, + "step": 23110 + }, + { + "epoch": 0.0467038627649818, + "grad_norm": 349.7676696777344, + "learning_rate": 4.624e-06, + "loss": 35.0445, + "step": 23120 + }, + { + "epoch": 0.04672406339766561, + "grad_norm": 434.74432373046875, + "learning_rate": 4.626e-06, + "loss": 31.6022, + "step": 23130 + }, + { + "epoch": 0.04674426403034943, + "grad_norm": 325.1117858886719, + "learning_rate": 4.628e-06, + "loss": 32.9686, + "step": 23140 + }, + { + "epoch": 0.046764464663033245, + "grad_norm": 279.353271484375, + "learning_rate": 4.6300000000000006e-06, + "loss": 23.7207, + "step": 23150 + }, + { + "epoch": 0.046784665295717065, + "grad_norm": 203.7786102294922, + "learning_rate": 4.632000000000001e-06, + "loss": 15.9233, + "step": 23160 + }, + { + "epoch": 0.04680486592840088, + "grad_norm": 68.2558364868164, + "learning_rate": 4.634e-06, + "loss": 18.657, + "step": 23170 + }, + { + "epoch": 0.04682506656108469, + "grad_norm": 333.67999267578125, + "learning_rate": 4.636e-06, + "loss": 37.6118, + "step": 23180 + }, + { + "epoch": 0.04684526719376851, + "grad_norm": 172.0549774169922, + "learning_rate": 4.638e-06, + "loss": 28.8333, + "step": 23190 + }, + { + "epoch": 0.046865467826452324, + "grad_norm": 365.0789489746094, + "learning_rate": 4.6400000000000005e-06, + "loss": 25.9707, + "step": 23200 + }, + { + "epoch": 0.04688566845913614, + "grad_norm": 452.52215576171875, + "learning_rate": 4.642000000000001e-06, + "loss": 22.5802, + "step": 23210 + }, + { + "epoch": 0.04690586909181996, + "grad_norm": 237.44772338867188, + "learning_rate": 4.644e-06, + "loss": 13.3965, + "step": 23220 + }, + { + "epoch": 0.04692606972450377, + "grad_norm": 308.40032958984375, + "learning_rate": 4.646000000000001e-06, + "loss": 32.5556, + "step": 23230 + }, + { + "epoch": 0.04694627035718759, + "grad_norm": 513.1425170898438, + "learning_rate": 4.648e-06, + "loss": 29.3123, + "step": 23240 + }, + { + "epoch": 0.0469664709898714, + "grad_norm": 558.6598510742188, + "learning_rate": 4.65e-06, + "loss": 47.8486, + "step": 23250 + }, + { + "epoch": 0.046986671622555216, + "grad_norm": 335.6075439453125, + "learning_rate": 4.6520000000000005e-06, + "loss": 33.3238, + "step": 23260 + }, + { + "epoch": 0.047006872255239036, + "grad_norm": 180.42686462402344, + "learning_rate": 4.654e-06, + "loss": 49.5276, + "step": 23270 + }, + { + "epoch": 0.04702707288792285, + "grad_norm": 223.03897094726562, + "learning_rate": 4.656000000000001e-06, + "loss": 19.6996, + "step": 23280 + }, + { + "epoch": 0.04704727352060666, + "grad_norm": 353.6033630371094, + "learning_rate": 4.658e-06, + "loss": 43.4261, + "step": 23290 + }, + { + "epoch": 0.04706747415329048, + "grad_norm": 408.29296875, + "learning_rate": 4.66e-06, + "loss": 31.6004, + "step": 23300 + }, + { + "epoch": 0.047087674785974296, + "grad_norm": 176.74081420898438, + "learning_rate": 4.6620000000000004e-06, + "loss": 16.1454, + "step": 23310 + }, + { + "epoch": 0.047107875418658116, + "grad_norm": 104.51349639892578, + "learning_rate": 4.664000000000001e-06, + "loss": 24.2505, + "step": 23320 + }, + { + "epoch": 0.04712807605134193, + "grad_norm": 252.4790496826172, + "learning_rate": 4.666000000000001e-06, + "loss": 20.1939, + "step": 23330 + }, + { + "epoch": 0.04714827668402574, + "grad_norm": 180.61465454101562, + "learning_rate": 4.668e-06, + "loss": 22.1896, + "step": 23340 + }, + { + "epoch": 0.04716847731670956, + "grad_norm": 245.4461669921875, + "learning_rate": 4.670000000000001e-06, + "loss": 50.637, + "step": 23350 + }, + { + "epoch": 0.047188677949393375, + "grad_norm": 237.6515350341797, + "learning_rate": 4.672e-06, + "loss": 69.12, + "step": 23360 + }, + { + "epoch": 0.04720887858207719, + "grad_norm": 364.08929443359375, + "learning_rate": 4.6740000000000005e-06, + "loss": 38.5139, + "step": 23370 + }, + { + "epoch": 0.04722907921476101, + "grad_norm": 648.5537109375, + "learning_rate": 4.676000000000001e-06, + "loss": 80.8779, + "step": 23380 + }, + { + "epoch": 0.04724927984744482, + "grad_norm": 801.7887573242188, + "learning_rate": 4.678e-06, + "loss": 35.1651, + "step": 23390 + }, + { + "epoch": 0.04726948048012864, + "grad_norm": 180.57571411132812, + "learning_rate": 4.680000000000001e-06, + "loss": 25.4433, + "step": 23400 + }, + { + "epoch": 0.047289681112812454, + "grad_norm": 1085.69384765625, + "learning_rate": 4.682e-06, + "loss": 29.3402, + "step": 23410 + }, + { + "epoch": 0.04730988174549627, + "grad_norm": 181.84576416015625, + "learning_rate": 4.684e-06, + "loss": 24.5804, + "step": 23420 + }, + { + "epoch": 0.04733008237818009, + "grad_norm": 170.5708465576172, + "learning_rate": 4.6860000000000005e-06, + "loss": 31.8119, + "step": 23430 + }, + { + "epoch": 0.0473502830108639, + "grad_norm": 282.826171875, + "learning_rate": 4.688000000000001e-06, + "loss": 32.2964, + "step": 23440 + }, + { + "epoch": 0.04737048364354771, + "grad_norm": 589.2730102539062, + "learning_rate": 4.69e-06, + "loss": 45.9049, + "step": 23450 + }, + { + "epoch": 0.04739068427623153, + "grad_norm": 184.30691528320312, + "learning_rate": 4.692e-06, + "loss": 22.7185, + "step": 23460 + }, + { + "epoch": 0.047410884908915346, + "grad_norm": 356.1437072753906, + "learning_rate": 4.694e-06, + "loss": 27.7077, + "step": 23470 + }, + { + "epoch": 0.047431085541599166, + "grad_norm": 152.33602905273438, + "learning_rate": 4.6960000000000004e-06, + "loss": 27.6814, + "step": 23480 + }, + { + "epoch": 0.04745128617428298, + "grad_norm": 91.97364807128906, + "learning_rate": 4.698000000000001e-06, + "loss": 32.484, + "step": 23490 + }, + { + "epoch": 0.04747148680696679, + "grad_norm": 266.3104248046875, + "learning_rate": 4.7e-06, + "loss": 47.0541, + "step": 23500 + }, + { + "epoch": 0.04749168743965061, + "grad_norm": 162.3765106201172, + "learning_rate": 4.702e-06, + "loss": 20.9091, + "step": 23510 + }, + { + "epoch": 0.047511888072334425, + "grad_norm": 228.0862579345703, + "learning_rate": 4.704e-06, + "loss": 20.6345, + "step": 23520 + }, + { + "epoch": 0.04753208870501824, + "grad_norm": 298.2235107421875, + "learning_rate": 4.706e-06, + "loss": 29.1333, + "step": 23530 + }, + { + "epoch": 0.04755228933770206, + "grad_norm": 866.2457885742188, + "learning_rate": 4.7080000000000005e-06, + "loss": 48.024, + "step": 23540 + }, + { + "epoch": 0.04757248997038587, + "grad_norm": 128.22824096679688, + "learning_rate": 4.71e-06, + "loss": 12.996, + "step": 23550 + }, + { + "epoch": 0.04759269060306969, + "grad_norm": 345.7879943847656, + "learning_rate": 4.712000000000001e-06, + "loss": 48.0625, + "step": 23560 + }, + { + "epoch": 0.047612891235753504, + "grad_norm": 216.14480590820312, + "learning_rate": 4.714e-06, + "loss": 31.7131, + "step": 23570 + }, + { + "epoch": 0.04763309186843732, + "grad_norm": 337.5866394042969, + "learning_rate": 4.716e-06, + "loss": 33.3357, + "step": 23580 + }, + { + "epoch": 0.04765329250112114, + "grad_norm": 238.96351623535156, + "learning_rate": 4.718e-06, + "loss": 35.2006, + "step": 23590 + }, + { + "epoch": 0.04767349313380495, + "grad_norm": 117.896728515625, + "learning_rate": 4.7200000000000005e-06, + "loss": 32.72, + "step": 23600 + }, + { + "epoch": 0.047693693766488764, + "grad_norm": 472.9936828613281, + "learning_rate": 4.722000000000001e-06, + "loss": 37.0214, + "step": 23610 + }, + { + "epoch": 0.047713894399172584, + "grad_norm": 149.30357360839844, + "learning_rate": 4.724e-06, + "loss": 40.1063, + "step": 23620 + }, + { + "epoch": 0.0477340950318564, + "grad_norm": 516.667236328125, + "learning_rate": 4.726000000000001e-06, + "loss": 38.01, + "step": 23630 + }, + { + "epoch": 0.04775429566454022, + "grad_norm": 237.48007202148438, + "learning_rate": 4.728e-06, + "loss": 28.6039, + "step": 23640 + }, + { + "epoch": 0.04777449629722403, + "grad_norm": 252.01455688476562, + "learning_rate": 4.7300000000000005e-06, + "loss": 23.7702, + "step": 23650 + }, + { + "epoch": 0.04779469692990784, + "grad_norm": 232.40296936035156, + "learning_rate": 4.732000000000001e-06, + "loss": 49.4607, + "step": 23660 + }, + { + "epoch": 0.04781489756259166, + "grad_norm": 474.0699157714844, + "learning_rate": 4.734e-06, + "loss": 45.5415, + "step": 23670 + }, + { + "epoch": 0.047835098195275476, + "grad_norm": 428.4226989746094, + "learning_rate": 4.736000000000001e-06, + "loss": 26.8226, + "step": 23680 + }, + { + "epoch": 0.04785529882795929, + "grad_norm": 391.0641784667969, + "learning_rate": 4.738e-06, + "loss": 23.176, + "step": 23690 + }, + { + "epoch": 0.04787549946064311, + "grad_norm": 254.60574340820312, + "learning_rate": 4.74e-06, + "loss": 35.1211, + "step": 23700 + }, + { + "epoch": 0.04789570009332692, + "grad_norm": 386.9615478515625, + "learning_rate": 4.7420000000000005e-06, + "loss": 26.655, + "step": 23710 + }, + { + "epoch": 0.04791590072601074, + "grad_norm": 423.5086364746094, + "learning_rate": 4.744000000000001e-06, + "loss": 34.1011, + "step": 23720 + }, + { + "epoch": 0.047936101358694555, + "grad_norm": 125.49838256835938, + "learning_rate": 4.746000000000001e-06, + "loss": 35.3341, + "step": 23730 + }, + { + "epoch": 0.04795630199137837, + "grad_norm": 369.2350769042969, + "learning_rate": 4.748e-06, + "loss": 42.2543, + "step": 23740 + }, + { + "epoch": 0.04797650262406219, + "grad_norm": 265.3240051269531, + "learning_rate": 4.75e-06, + "loss": 41.9161, + "step": 23750 + }, + { + "epoch": 0.047996703256746, + "grad_norm": 381.4123840332031, + "learning_rate": 4.752e-06, + "loss": 51.3242, + "step": 23760 + }, + { + "epoch": 0.048016903889429814, + "grad_norm": 1546.1595458984375, + "learning_rate": 4.7540000000000006e-06, + "loss": 31.1774, + "step": 23770 + }, + { + "epoch": 0.048037104522113634, + "grad_norm": 360.0041809082031, + "learning_rate": 4.756000000000001e-06, + "loss": 50.6428, + "step": 23780 + }, + { + "epoch": 0.04805730515479745, + "grad_norm": 253.70651245117188, + "learning_rate": 4.758e-06, + "loss": 31.1466, + "step": 23790 + }, + { + "epoch": 0.04807750578748127, + "grad_norm": 2709.918701171875, + "learning_rate": 4.76e-06, + "loss": 60.5765, + "step": 23800 + }, + { + "epoch": 0.04809770642016508, + "grad_norm": 364.52545166015625, + "learning_rate": 4.762e-06, + "loss": 34.6433, + "step": 23810 + }, + { + "epoch": 0.04811790705284889, + "grad_norm": 303.4349670410156, + "learning_rate": 4.7640000000000005e-06, + "loss": 25.8956, + "step": 23820 + }, + { + "epoch": 0.04813810768553271, + "grad_norm": 298.9841613769531, + "learning_rate": 4.766000000000001e-06, + "loss": 20.4022, + "step": 23830 + }, + { + "epoch": 0.048158308318216526, + "grad_norm": 385.0189208984375, + "learning_rate": 4.768000000000001e-06, + "loss": 32.9447, + "step": 23840 + }, + { + "epoch": 0.04817850895090034, + "grad_norm": 225.636962890625, + "learning_rate": 4.77e-06, + "loss": 31.4838, + "step": 23850 + }, + { + "epoch": 0.04819870958358416, + "grad_norm": 259.67108154296875, + "learning_rate": 4.772e-06, + "loss": 32.3447, + "step": 23860 + }, + { + "epoch": 0.04821891021626797, + "grad_norm": 213.6777801513672, + "learning_rate": 4.774e-06, + "loss": 26.7017, + "step": 23870 + }, + { + "epoch": 0.04823911084895179, + "grad_norm": 221.456787109375, + "learning_rate": 4.7760000000000005e-06, + "loss": 35.3876, + "step": 23880 + }, + { + "epoch": 0.048259311481635606, + "grad_norm": 510.59423828125, + "learning_rate": 4.778000000000001e-06, + "loss": 58.0918, + "step": 23890 + }, + { + "epoch": 0.04827951211431942, + "grad_norm": 586.248291015625, + "learning_rate": 4.78e-06, + "loss": 31.6313, + "step": 23900 + }, + { + "epoch": 0.04829971274700324, + "grad_norm": 421.5391540527344, + "learning_rate": 4.782e-06, + "loss": 30.8656, + "step": 23910 + }, + { + "epoch": 0.04831991337968705, + "grad_norm": 293.57696533203125, + "learning_rate": 4.784e-06, + "loss": 26.2061, + "step": 23920 + }, + { + "epoch": 0.048340114012370865, + "grad_norm": 145.83151245117188, + "learning_rate": 4.7860000000000004e-06, + "loss": 19.0745, + "step": 23930 + }, + { + "epoch": 0.048360314645054685, + "grad_norm": 218.84844970703125, + "learning_rate": 4.7880000000000006e-06, + "loss": 28.3748, + "step": 23940 + }, + { + "epoch": 0.0483805152777385, + "grad_norm": 369.0185852050781, + "learning_rate": 4.79e-06, + "loss": 34.9431, + "step": 23950 + }, + { + "epoch": 0.04840071591042232, + "grad_norm": 209.40855407714844, + "learning_rate": 4.792000000000001e-06, + "loss": 34.9354, + "step": 23960 + }, + { + "epoch": 0.04842091654310613, + "grad_norm": 650.5877075195312, + "learning_rate": 4.794e-06, + "loss": 30.4894, + "step": 23970 + }, + { + "epoch": 0.048441117175789944, + "grad_norm": 599.2838745117188, + "learning_rate": 4.796e-06, + "loss": 61.641, + "step": 23980 + }, + { + "epoch": 0.048461317808473764, + "grad_norm": 191.25987243652344, + "learning_rate": 4.7980000000000005e-06, + "loss": 29.2233, + "step": 23990 + }, + { + "epoch": 0.04848151844115758, + "grad_norm": 117.61515045166016, + "learning_rate": 4.800000000000001e-06, + "loss": 27.267, + "step": 24000 + }, + { + "epoch": 0.04850171907384139, + "grad_norm": 355.64459228515625, + "learning_rate": 4.802000000000001e-06, + "loss": 30.4391, + "step": 24010 + }, + { + "epoch": 0.04852191970652521, + "grad_norm": 333.71051025390625, + "learning_rate": 4.804e-06, + "loss": 23.3836, + "step": 24020 + }, + { + "epoch": 0.04854212033920902, + "grad_norm": 168.31088256835938, + "learning_rate": 4.806000000000001e-06, + "loss": 22.9719, + "step": 24030 + }, + { + "epoch": 0.04856232097189284, + "grad_norm": 372.96038818359375, + "learning_rate": 4.808e-06, + "loss": 19.0593, + "step": 24040 + }, + { + "epoch": 0.048582521604576656, + "grad_norm": 158.93374633789062, + "learning_rate": 4.8100000000000005e-06, + "loss": 35.1784, + "step": 24050 + }, + { + "epoch": 0.04860272223726047, + "grad_norm": 143.72250366210938, + "learning_rate": 4.812000000000001e-06, + "loss": 58.8182, + "step": 24060 + }, + { + "epoch": 0.04862292286994429, + "grad_norm": 248.034912109375, + "learning_rate": 4.814e-06, + "loss": 17.1163, + "step": 24070 + }, + { + "epoch": 0.0486431235026281, + "grad_norm": 908.7403564453125, + "learning_rate": 4.816e-06, + "loss": 32.6592, + "step": 24080 + }, + { + "epoch": 0.048663324135311915, + "grad_norm": 268.7630310058594, + "learning_rate": 4.818e-06, + "loss": 47.9006, + "step": 24090 + }, + { + "epoch": 0.048683524767995735, + "grad_norm": 101.19955444335938, + "learning_rate": 4.8200000000000004e-06, + "loss": 24.2114, + "step": 24100 + }, + { + "epoch": 0.04870372540067955, + "grad_norm": 272.14288330078125, + "learning_rate": 4.822000000000001e-06, + "loss": 41.5454, + "step": 24110 + }, + { + "epoch": 0.04872392603336337, + "grad_norm": 523.4356689453125, + "learning_rate": 4.824000000000001e-06, + "loss": 26.7681, + "step": 24120 + }, + { + "epoch": 0.04874412666604718, + "grad_norm": 266.33056640625, + "learning_rate": 4.826e-06, + "loss": 19.0341, + "step": 24130 + }, + { + "epoch": 0.048764327298730994, + "grad_norm": 389.2867736816406, + "learning_rate": 4.828e-06, + "loss": 43.7141, + "step": 24140 + }, + { + "epoch": 0.048784527931414814, + "grad_norm": 571.64453125, + "learning_rate": 4.83e-06, + "loss": 29.1342, + "step": 24150 + }, + { + "epoch": 0.04880472856409863, + "grad_norm": 329.1141662597656, + "learning_rate": 4.8320000000000005e-06, + "loss": 27.7108, + "step": 24160 + }, + { + "epoch": 0.04882492919678244, + "grad_norm": 293.8599548339844, + "learning_rate": 4.834000000000001e-06, + "loss": 48.921, + "step": 24170 + }, + { + "epoch": 0.04884512982946626, + "grad_norm": 522.3822021484375, + "learning_rate": 4.836e-06, + "loss": 57.5064, + "step": 24180 + }, + { + "epoch": 0.048865330462150074, + "grad_norm": 411.9894714355469, + "learning_rate": 4.838e-06, + "loss": 40.1941, + "step": 24190 + }, + { + "epoch": 0.048885531094833894, + "grad_norm": 247.519775390625, + "learning_rate": 4.84e-06, + "loss": 42.679, + "step": 24200 + }, + { + "epoch": 0.04890573172751771, + "grad_norm": 951.4860229492188, + "learning_rate": 4.842e-06, + "loss": 45.0348, + "step": 24210 + }, + { + "epoch": 0.04892593236020152, + "grad_norm": 0.0, + "learning_rate": 4.8440000000000005e-06, + "loss": 29.2135, + "step": 24220 + }, + { + "epoch": 0.04894613299288534, + "grad_norm": 0.0, + "learning_rate": 4.846e-06, + "loss": 31.1792, + "step": 24230 + }, + { + "epoch": 0.04896633362556915, + "grad_norm": 545.0596923828125, + "learning_rate": 4.848000000000001e-06, + "loss": 36.6954, + "step": 24240 + }, + { + "epoch": 0.048986534258252966, + "grad_norm": 357.08251953125, + "learning_rate": 4.85e-06, + "loss": 31.0875, + "step": 24250 + }, + { + "epoch": 0.049006734890936786, + "grad_norm": 473.1929016113281, + "learning_rate": 4.852e-06, + "loss": 37.1735, + "step": 24260 + }, + { + "epoch": 0.0490269355236206, + "grad_norm": 189.36502075195312, + "learning_rate": 4.8540000000000005e-06, + "loss": 36.2747, + "step": 24270 + }, + { + "epoch": 0.04904713615630442, + "grad_norm": 120.2989730834961, + "learning_rate": 4.856e-06, + "loss": 33.0543, + "step": 24280 + }, + { + "epoch": 0.04906733678898823, + "grad_norm": 72.84486389160156, + "learning_rate": 4.858000000000001e-06, + "loss": 40.5298, + "step": 24290 + }, + { + "epoch": 0.049087537421672045, + "grad_norm": 469.8461608886719, + "learning_rate": 4.86e-06, + "loss": 39.9338, + "step": 24300 + }, + { + "epoch": 0.049107738054355865, + "grad_norm": 264.76458740234375, + "learning_rate": 4.862e-06, + "loss": 40.7187, + "step": 24310 + }, + { + "epoch": 0.04912793868703968, + "grad_norm": 2196.45556640625, + "learning_rate": 4.864e-06, + "loss": 54.8013, + "step": 24320 + }, + { + "epoch": 0.04914813931972349, + "grad_norm": 726.4110717773438, + "learning_rate": 4.8660000000000005e-06, + "loss": 49.6623, + "step": 24330 + }, + { + "epoch": 0.04916833995240731, + "grad_norm": 60.958251953125, + "learning_rate": 4.868000000000001e-06, + "loss": 34.9965, + "step": 24340 + }, + { + "epoch": 0.049188540585091124, + "grad_norm": 238.892822265625, + "learning_rate": 4.87e-06, + "loss": 47.3049, + "step": 24350 + }, + { + "epoch": 0.04920874121777494, + "grad_norm": 666.7787475585938, + "learning_rate": 4.872000000000001e-06, + "loss": 35.4274, + "step": 24360 + }, + { + "epoch": 0.04922894185045876, + "grad_norm": 548.2902221679688, + "learning_rate": 4.874e-06, + "loss": 56.2392, + "step": 24370 + }, + { + "epoch": 0.04924914248314257, + "grad_norm": 411.37261962890625, + "learning_rate": 4.876e-06, + "loss": 29.0048, + "step": 24380 + }, + { + "epoch": 0.04926934311582639, + "grad_norm": 255.85255432128906, + "learning_rate": 4.8780000000000006e-06, + "loss": 39.0574, + "step": 24390 + }, + { + "epoch": 0.0492895437485102, + "grad_norm": 136.60525512695312, + "learning_rate": 4.880000000000001e-06, + "loss": 23.4296, + "step": 24400 + }, + { + "epoch": 0.049309744381194016, + "grad_norm": 280.9986572265625, + "learning_rate": 4.882000000000001e-06, + "loss": 29.5125, + "step": 24410 + }, + { + "epoch": 0.049329945013877836, + "grad_norm": 533.7042236328125, + "learning_rate": 4.884e-06, + "loss": 31.8714, + "step": 24420 + }, + { + "epoch": 0.04935014564656165, + "grad_norm": 179.54776000976562, + "learning_rate": 4.886e-06, + "loss": 18.4666, + "step": 24430 + }, + { + "epoch": 0.04937034627924546, + "grad_norm": 396.60711669921875, + "learning_rate": 4.8880000000000005e-06, + "loss": 25.4745, + "step": 24440 + }, + { + "epoch": 0.04939054691192928, + "grad_norm": 406.3586120605469, + "learning_rate": 4.890000000000001e-06, + "loss": 25.7635, + "step": 24450 + }, + { + "epoch": 0.049410747544613096, + "grad_norm": 2937.0927734375, + "learning_rate": 4.892000000000001e-06, + "loss": 54.7187, + "step": 24460 + }, + { + "epoch": 0.049430948177296916, + "grad_norm": 722.0195922851562, + "learning_rate": 4.894e-06, + "loss": 45.7203, + "step": 24470 + }, + { + "epoch": 0.04945114880998073, + "grad_norm": 281.9906005859375, + "learning_rate": 4.896e-06, + "loss": 32.035, + "step": 24480 + }, + { + "epoch": 0.04947134944266454, + "grad_norm": 221.22129821777344, + "learning_rate": 4.898e-06, + "loss": 59.1987, + "step": 24490 + }, + { + "epoch": 0.04949155007534836, + "grad_norm": 456.32635498046875, + "learning_rate": 4.9000000000000005e-06, + "loss": 31.1111, + "step": 24500 + }, + { + "epoch": 0.049511750708032175, + "grad_norm": 483.26934814453125, + "learning_rate": 4.902000000000001e-06, + "loss": 43.1372, + "step": 24510 + }, + { + "epoch": 0.04953195134071599, + "grad_norm": 48.44151306152344, + "learning_rate": 4.904000000000001e-06, + "loss": 26.2799, + "step": 24520 + }, + { + "epoch": 0.04955215197339981, + "grad_norm": 1512.83984375, + "learning_rate": 4.906e-06, + "loss": 27.6163, + "step": 24530 + }, + { + "epoch": 0.04957235260608362, + "grad_norm": 285.0791931152344, + "learning_rate": 4.908e-06, + "loss": 28.3728, + "step": 24540 + }, + { + "epoch": 0.04959255323876744, + "grad_norm": 787.2332763671875, + "learning_rate": 4.9100000000000004e-06, + "loss": 23.7764, + "step": 24550 + }, + { + "epoch": 0.049612753871451254, + "grad_norm": 205.12551879882812, + "learning_rate": 4.9120000000000006e-06, + "loss": 41.47, + "step": 24560 + }, + { + "epoch": 0.04963295450413507, + "grad_norm": 135.7936553955078, + "learning_rate": 4.914000000000001e-06, + "loss": 23.9787, + "step": 24570 + }, + { + "epoch": 0.04965315513681889, + "grad_norm": 165.5546417236328, + "learning_rate": 4.916e-06, + "loss": 19.4133, + "step": 24580 + }, + { + "epoch": 0.0496733557695027, + "grad_norm": 320.9225769042969, + "learning_rate": 4.918e-06, + "loss": 44.5957, + "step": 24590 + }, + { + "epoch": 0.04969355640218651, + "grad_norm": 201.8945770263672, + "learning_rate": 4.92e-06, + "loss": 26.1929, + "step": 24600 + }, + { + "epoch": 0.04971375703487033, + "grad_norm": 239.5926055908203, + "learning_rate": 4.9220000000000005e-06, + "loss": 24.9607, + "step": 24610 + }, + { + "epoch": 0.049733957667554146, + "grad_norm": 512.6834716796875, + "learning_rate": 4.924000000000001e-06, + "loss": 28.6257, + "step": 24620 + }, + { + "epoch": 0.049754158300237966, + "grad_norm": 264.6862487792969, + "learning_rate": 4.926e-06, + "loss": 18.0654, + "step": 24630 + }, + { + "epoch": 0.04977435893292178, + "grad_norm": 467.5845642089844, + "learning_rate": 4.928000000000001e-06, + "loss": 32.0786, + "step": 24640 + }, + { + "epoch": 0.04979455956560559, + "grad_norm": 315.939453125, + "learning_rate": 4.93e-06, + "loss": 34.9802, + "step": 24650 + }, + { + "epoch": 0.04981476019828941, + "grad_norm": 422.5688171386719, + "learning_rate": 4.932e-06, + "loss": 30.9701, + "step": 24660 + }, + { + "epoch": 0.049834960830973225, + "grad_norm": 670.5335083007812, + "learning_rate": 4.9340000000000005e-06, + "loss": 59.6177, + "step": 24670 + }, + { + "epoch": 0.04985516146365704, + "grad_norm": 159.326904296875, + "learning_rate": 4.936e-06, + "loss": 33.8358, + "step": 24680 + }, + { + "epoch": 0.04987536209634086, + "grad_norm": 517.6242065429688, + "learning_rate": 4.938000000000001e-06, + "loss": 28.0662, + "step": 24690 + }, + { + "epoch": 0.04989556272902467, + "grad_norm": 172.5724334716797, + "learning_rate": 4.94e-06, + "loss": 43.5524, + "step": 24700 + }, + { + "epoch": 0.04991576336170849, + "grad_norm": 31.741701126098633, + "learning_rate": 4.942e-06, + "loss": 19.8362, + "step": 24710 + }, + { + "epoch": 0.049935963994392304, + "grad_norm": 451.263671875, + "learning_rate": 4.9440000000000004e-06, + "loss": 49.2625, + "step": 24720 + }, + { + "epoch": 0.04995616462707612, + "grad_norm": 142.236083984375, + "learning_rate": 4.946000000000001e-06, + "loss": 41.2436, + "step": 24730 + }, + { + "epoch": 0.04997636525975994, + "grad_norm": 292.59075927734375, + "learning_rate": 4.948000000000001e-06, + "loss": 26.0532, + "step": 24740 + }, + { + "epoch": 0.04999656589244375, + "grad_norm": 103.89948272705078, + "learning_rate": 4.95e-06, + "loss": 28.4878, + "step": 24750 + }, + { + "epoch": 0.050016766525127564, + "grad_norm": 245.5137481689453, + "learning_rate": 4.952e-06, + "loss": 47.3051, + "step": 24760 + }, + { + "epoch": 0.050036967157811384, + "grad_norm": 1218.4417724609375, + "learning_rate": 4.954e-06, + "loss": 55.935, + "step": 24770 + }, + { + "epoch": 0.0500571677904952, + "grad_norm": 534.2709350585938, + "learning_rate": 4.9560000000000005e-06, + "loss": 41.0981, + "step": 24780 + }, + { + "epoch": 0.05007736842317902, + "grad_norm": 277.80224609375, + "learning_rate": 4.958000000000001e-06, + "loss": 43.365, + "step": 24790 + }, + { + "epoch": 0.05009756905586283, + "grad_norm": 190.49661254882812, + "learning_rate": 4.960000000000001e-06, + "loss": 23.9928, + "step": 24800 + }, + { + "epoch": 0.05011776968854664, + "grad_norm": 380.3485107421875, + "learning_rate": 4.962e-06, + "loss": 30.1288, + "step": 24810 + }, + { + "epoch": 0.05013797032123046, + "grad_norm": 207.34530639648438, + "learning_rate": 4.964e-06, + "loss": 56.9578, + "step": 24820 + }, + { + "epoch": 0.050158170953914276, + "grad_norm": 716.9381713867188, + "learning_rate": 4.966e-06, + "loss": 36.0246, + "step": 24830 + }, + { + "epoch": 0.05017837158659809, + "grad_norm": 430.68359375, + "learning_rate": 4.9680000000000005e-06, + "loss": 36.4686, + "step": 24840 + }, + { + "epoch": 0.05019857221928191, + "grad_norm": 409.4767150878906, + "learning_rate": 4.970000000000001e-06, + "loss": 50.6373, + "step": 24850 + }, + { + "epoch": 0.05021877285196572, + "grad_norm": 383.60711669921875, + "learning_rate": 4.972e-06, + "loss": 34.2077, + "step": 24860 + }, + { + "epoch": 0.05023897348464954, + "grad_norm": 630.3153686523438, + "learning_rate": 4.974e-06, + "loss": 49.5778, + "step": 24870 + }, + { + "epoch": 0.050259174117333355, + "grad_norm": 836.9533081054688, + "learning_rate": 4.976e-06, + "loss": 44.2279, + "step": 24880 + }, + { + "epoch": 0.05027937475001717, + "grad_norm": 202.4707794189453, + "learning_rate": 4.9780000000000005e-06, + "loss": 36.9321, + "step": 24890 + }, + { + "epoch": 0.05029957538270099, + "grad_norm": 270.40838623046875, + "learning_rate": 4.980000000000001e-06, + "loss": 24.9977, + "step": 24900 + }, + { + "epoch": 0.0503197760153848, + "grad_norm": 199.77508544921875, + "learning_rate": 4.982e-06, + "loss": 21.765, + "step": 24910 + }, + { + "epoch": 0.050339976648068614, + "grad_norm": 278.4015808105469, + "learning_rate": 4.984000000000001e-06, + "loss": 33.6428, + "step": 24920 + }, + { + "epoch": 0.050360177280752434, + "grad_norm": 190.54000854492188, + "learning_rate": 4.986e-06, + "loss": 14.8352, + "step": 24930 + }, + { + "epoch": 0.05038037791343625, + "grad_norm": 28.801469802856445, + "learning_rate": 4.988e-06, + "loss": 13.3349, + "step": 24940 + }, + { + "epoch": 0.05040057854612007, + "grad_norm": 86.93453216552734, + "learning_rate": 4.9900000000000005e-06, + "loss": 35.9467, + "step": 24950 + }, + { + "epoch": 0.05042077917880388, + "grad_norm": 88.48124694824219, + "learning_rate": 4.992e-06, + "loss": 34.9635, + "step": 24960 + }, + { + "epoch": 0.05044097981148769, + "grad_norm": 308.2735595703125, + "learning_rate": 4.994000000000001e-06, + "loss": 28.927, + "step": 24970 + }, + { + "epoch": 0.05046118044417151, + "grad_norm": 332.31890869140625, + "learning_rate": 4.996e-06, + "loss": 40.6573, + "step": 24980 + }, + { + "epoch": 0.050481381076855326, + "grad_norm": 179.0585174560547, + "learning_rate": 4.998e-06, + "loss": 39.3382, + "step": 24990 + }, + { + "epoch": 0.05050158170953914, + "grad_norm": 401.5177307128906, + "learning_rate": 5e-06, + "loss": 18.8979, + "step": 25000 + }, + { + "epoch": 0.05052178234222296, + "grad_norm": 126.90967559814453, + "learning_rate": 5.0020000000000006e-06, + "loss": 24.5952, + "step": 25010 + }, + { + "epoch": 0.05054198297490677, + "grad_norm": 203.88487243652344, + "learning_rate": 5.004e-06, + "loss": 35.7379, + "step": 25020 + }, + { + "epoch": 0.05056218360759059, + "grad_norm": 31.458026885986328, + "learning_rate": 5.006000000000001e-06, + "loss": 25.2942, + "step": 25030 + }, + { + "epoch": 0.050582384240274406, + "grad_norm": 278.30572509765625, + "learning_rate": 5.008000000000001e-06, + "loss": 37.5499, + "step": 25040 + }, + { + "epoch": 0.05060258487295822, + "grad_norm": 207.86512756347656, + "learning_rate": 5.01e-06, + "loss": 26.2798, + "step": 25050 + }, + { + "epoch": 0.05062278550564204, + "grad_norm": 132.76596069335938, + "learning_rate": 5.0120000000000005e-06, + "loss": 25.1971, + "step": 25060 + }, + { + "epoch": 0.05064298613832585, + "grad_norm": 257.3799133300781, + "learning_rate": 5.014e-06, + "loss": 23.6105, + "step": 25070 + }, + { + "epoch": 0.050663186771009665, + "grad_norm": 1644.5107421875, + "learning_rate": 5.016000000000001e-06, + "loss": 63.1909, + "step": 25080 + }, + { + "epoch": 0.050683387403693485, + "grad_norm": 963.2476196289062, + "learning_rate": 5.018000000000001e-06, + "loss": 55.8948, + "step": 25090 + }, + { + "epoch": 0.0507035880363773, + "grad_norm": 219.97930908203125, + "learning_rate": 5.02e-06, + "loss": 16.093, + "step": 25100 + }, + { + "epoch": 0.05072378866906112, + "grad_norm": 198.54092407226562, + "learning_rate": 5.022e-06, + "loss": 28.0116, + "step": 25110 + }, + { + "epoch": 0.05074398930174493, + "grad_norm": 9.563661575317383, + "learning_rate": 5.024e-06, + "loss": 36.7111, + "step": 25120 + }, + { + "epoch": 0.050764189934428744, + "grad_norm": 410.5440979003906, + "learning_rate": 5.026000000000001e-06, + "loss": 68.4753, + "step": 25130 + }, + { + "epoch": 0.050784390567112564, + "grad_norm": 264.5072326660156, + "learning_rate": 5.028000000000001e-06, + "loss": 30.7421, + "step": 25140 + }, + { + "epoch": 0.05080459119979638, + "grad_norm": 0.0, + "learning_rate": 5.03e-06, + "loss": 45.6329, + "step": 25150 + }, + { + "epoch": 0.05082479183248019, + "grad_norm": 197.56336975097656, + "learning_rate": 5.032e-06, + "loss": 26.0097, + "step": 25160 + }, + { + "epoch": 0.05084499246516401, + "grad_norm": 95.28469848632812, + "learning_rate": 5.0339999999999996e-06, + "loss": 71.7765, + "step": 25170 + }, + { + "epoch": 0.05086519309784782, + "grad_norm": 226.5924072265625, + "learning_rate": 5.0360000000000006e-06, + "loss": 31.2543, + "step": 25180 + }, + { + "epoch": 0.05088539373053164, + "grad_norm": 196.97518920898438, + "learning_rate": 5.038000000000001e-06, + "loss": 22.2428, + "step": 25190 + }, + { + "epoch": 0.050905594363215456, + "grad_norm": 370.3966369628906, + "learning_rate": 5.04e-06, + "loss": 23.8425, + "step": 25200 + }, + { + "epoch": 0.05092579499589927, + "grad_norm": 319.02447509765625, + "learning_rate": 5.042e-06, + "loss": 21.7254, + "step": 25210 + }, + { + "epoch": 0.05094599562858309, + "grad_norm": 281.3335876464844, + "learning_rate": 5.044e-06, + "loss": 41.2796, + "step": 25220 + }, + { + "epoch": 0.0509661962612669, + "grad_norm": 1281.8736572265625, + "learning_rate": 5.0460000000000005e-06, + "loss": 52.3635, + "step": 25230 + }, + { + "epoch": 0.050986396893950715, + "grad_norm": 298.2367858886719, + "learning_rate": 5.048000000000001e-06, + "loss": 41.1969, + "step": 25240 + }, + { + "epoch": 0.051006597526634535, + "grad_norm": 348.9700012207031, + "learning_rate": 5.050000000000001e-06, + "loss": 35.1046, + "step": 25250 + }, + { + "epoch": 0.05102679815931835, + "grad_norm": 377.2474365234375, + "learning_rate": 5.052e-06, + "loss": 24.9522, + "step": 25260 + }, + { + "epoch": 0.05104699879200217, + "grad_norm": 357.2254333496094, + "learning_rate": 5.054e-06, + "loss": 17.7826, + "step": 25270 + }, + { + "epoch": 0.05106719942468598, + "grad_norm": 178.88450622558594, + "learning_rate": 5.056000000000001e-06, + "loss": 32.328, + "step": 25280 + }, + { + "epoch": 0.051087400057369795, + "grad_norm": 73.72966766357422, + "learning_rate": 5.0580000000000005e-06, + "loss": 39.4752, + "step": 25290 + }, + { + "epoch": 0.051107600690053615, + "grad_norm": 0.0, + "learning_rate": 5.060000000000001e-06, + "loss": 42.9825, + "step": 25300 + }, + { + "epoch": 0.05112780132273743, + "grad_norm": 0.0, + "learning_rate": 5.062e-06, + "loss": 19.2751, + "step": 25310 + }, + { + "epoch": 0.05114800195542124, + "grad_norm": 164.6295928955078, + "learning_rate": 5.064e-06, + "loss": 30.3619, + "step": 25320 + }, + { + "epoch": 0.05116820258810506, + "grad_norm": 107.75360107421875, + "learning_rate": 5.066000000000001e-06, + "loss": 36.0318, + "step": 25330 + }, + { + "epoch": 0.051188403220788874, + "grad_norm": 386.89239501953125, + "learning_rate": 5.0680000000000004e-06, + "loss": 33.5402, + "step": 25340 + }, + { + "epoch": 0.051208603853472694, + "grad_norm": 350.92889404296875, + "learning_rate": 5.070000000000001e-06, + "loss": 34.1303, + "step": 25350 + }, + { + "epoch": 0.05122880448615651, + "grad_norm": 494.48809814453125, + "learning_rate": 5.072e-06, + "loss": 28.6663, + "step": 25360 + }, + { + "epoch": 0.05124900511884032, + "grad_norm": 149.0237579345703, + "learning_rate": 5.074e-06, + "loss": 21.9641, + "step": 25370 + }, + { + "epoch": 0.05126920575152414, + "grad_norm": 409.06689453125, + "learning_rate": 5.076000000000001e-06, + "loss": 46.543, + "step": 25380 + }, + { + "epoch": 0.05128940638420795, + "grad_norm": 292.32781982421875, + "learning_rate": 5.078e-06, + "loss": 26.6015, + "step": 25390 + }, + { + "epoch": 0.051309607016891766, + "grad_norm": 115.6833267211914, + "learning_rate": 5.0800000000000005e-06, + "loss": 34.6124, + "step": 25400 + }, + { + "epoch": 0.051329807649575586, + "grad_norm": 152.78005981445312, + "learning_rate": 5.082000000000001e-06, + "loss": 20.7547, + "step": 25410 + }, + { + "epoch": 0.0513500082822594, + "grad_norm": 274.0210876464844, + "learning_rate": 5.084e-06, + "loss": 30.4251, + "step": 25420 + }, + { + "epoch": 0.05137020891494322, + "grad_norm": 287.94451904296875, + "learning_rate": 5.086000000000001e-06, + "loss": 17.8743, + "step": 25430 + }, + { + "epoch": 0.05139040954762703, + "grad_norm": 363.70941162109375, + "learning_rate": 5.088000000000001e-06, + "loss": 20.8506, + "step": 25440 + }, + { + "epoch": 0.051410610180310845, + "grad_norm": 679.3761596679688, + "learning_rate": 5.09e-06, + "loss": 34.454, + "step": 25450 + }, + { + "epoch": 0.051430810812994665, + "grad_norm": 366.37115478515625, + "learning_rate": 5.0920000000000005e-06, + "loss": 26.0402, + "step": 25460 + }, + { + "epoch": 0.05145101144567848, + "grad_norm": 491.2541198730469, + "learning_rate": 5.094e-06, + "loss": 25.1115, + "step": 25470 + }, + { + "epoch": 0.05147121207836229, + "grad_norm": 479.7918701171875, + "learning_rate": 5.096000000000001e-06, + "loss": 30.0957, + "step": 25480 + }, + { + "epoch": 0.05149141271104611, + "grad_norm": 84.84467315673828, + "learning_rate": 5.098000000000001e-06, + "loss": 35.6397, + "step": 25490 + }, + { + "epoch": 0.051511613343729924, + "grad_norm": 741.4186401367188, + "learning_rate": 5.1e-06, + "loss": 19.2858, + "step": 25500 + }, + { + "epoch": 0.051531813976413744, + "grad_norm": 380.09454345703125, + "learning_rate": 5.1020000000000004e-06, + "loss": 45.9541, + "step": 25510 + }, + { + "epoch": 0.05155201460909756, + "grad_norm": 265.50042724609375, + "learning_rate": 5.104e-06, + "loss": 32.1022, + "step": 25520 + }, + { + "epoch": 0.05157221524178137, + "grad_norm": 543.1854858398438, + "learning_rate": 5.106000000000001e-06, + "loss": 47.4233, + "step": 25530 + }, + { + "epoch": 0.05159241587446519, + "grad_norm": 977.0132446289062, + "learning_rate": 5.108000000000001e-06, + "loss": 52.7017, + "step": 25540 + }, + { + "epoch": 0.051612616507149, + "grad_norm": 234.1029815673828, + "learning_rate": 5.11e-06, + "loss": 39.0079, + "step": 25550 + }, + { + "epoch": 0.051632817139832816, + "grad_norm": 133.4276885986328, + "learning_rate": 5.112e-06, + "loss": 38.9247, + "step": 25560 + }, + { + "epoch": 0.051653017772516636, + "grad_norm": 562.444091796875, + "learning_rate": 5.114e-06, + "loss": 47.2719, + "step": 25570 + }, + { + "epoch": 0.05167321840520045, + "grad_norm": 243.87353515625, + "learning_rate": 5.116000000000001e-06, + "loss": 39.1346, + "step": 25580 + }, + { + "epoch": 0.05169341903788427, + "grad_norm": 354.45098876953125, + "learning_rate": 5.118000000000001e-06, + "loss": 57.6305, + "step": 25590 + }, + { + "epoch": 0.05171361967056808, + "grad_norm": 194.29457092285156, + "learning_rate": 5.12e-06, + "loss": 24.6174, + "step": 25600 + }, + { + "epoch": 0.051733820303251896, + "grad_norm": 237.46725463867188, + "learning_rate": 5.122e-06, + "loss": 33.0616, + "step": 25610 + }, + { + "epoch": 0.051754020935935716, + "grad_norm": 120.91265869140625, + "learning_rate": 5.124e-06, + "loss": 57.4991, + "step": 25620 + }, + { + "epoch": 0.05177422156861953, + "grad_norm": 809.0008544921875, + "learning_rate": 5.126e-06, + "loss": 41.0335, + "step": 25630 + }, + { + "epoch": 0.05179442220130334, + "grad_norm": 258.4845275878906, + "learning_rate": 5.128000000000001e-06, + "loss": 26.7815, + "step": 25640 + }, + { + "epoch": 0.05181462283398716, + "grad_norm": 251.044677734375, + "learning_rate": 5.130000000000001e-06, + "loss": 50.3299, + "step": 25650 + }, + { + "epoch": 0.051834823466670975, + "grad_norm": 314.73687744140625, + "learning_rate": 5.132e-06, + "loss": 44.4338, + "step": 25660 + }, + { + "epoch": 0.051855024099354795, + "grad_norm": 120.0523452758789, + "learning_rate": 5.134e-06, + "loss": 29.2697, + "step": 25670 + }, + { + "epoch": 0.05187522473203861, + "grad_norm": 300.96966552734375, + "learning_rate": 5.136e-06, + "loss": 34.9113, + "step": 25680 + }, + { + "epoch": 0.05189542536472242, + "grad_norm": 159.7620849609375, + "learning_rate": 5.138000000000001e-06, + "loss": 22.3714, + "step": 25690 + }, + { + "epoch": 0.05191562599740624, + "grad_norm": 408.8251037597656, + "learning_rate": 5.140000000000001e-06, + "loss": 37.7346, + "step": 25700 + }, + { + "epoch": 0.051935826630090054, + "grad_norm": 1339.5537109375, + "learning_rate": 5.142e-06, + "loss": 30.9084, + "step": 25710 + }, + { + "epoch": 0.05195602726277387, + "grad_norm": 195.84228515625, + "learning_rate": 5.144e-06, + "loss": 21.4031, + "step": 25720 + }, + { + "epoch": 0.05197622789545769, + "grad_norm": 283.7611999511719, + "learning_rate": 5.1459999999999995e-06, + "loss": 31.4553, + "step": 25730 + }, + { + "epoch": 0.0519964285281415, + "grad_norm": 406.22857666015625, + "learning_rate": 5.1480000000000005e-06, + "loss": 34.3794, + "step": 25740 + }, + { + "epoch": 0.05201662916082532, + "grad_norm": 1262.8756103515625, + "learning_rate": 5.150000000000001e-06, + "loss": 36.6906, + "step": 25750 + }, + { + "epoch": 0.05203682979350913, + "grad_norm": 40.364742279052734, + "learning_rate": 5.152e-06, + "loss": 32.7776, + "step": 25760 + }, + { + "epoch": 0.052057030426192946, + "grad_norm": 217.0058135986328, + "learning_rate": 5.154e-06, + "loss": 30.9879, + "step": 25770 + }, + { + "epoch": 0.052077231058876766, + "grad_norm": 1036.8714599609375, + "learning_rate": 5.156e-06, + "loss": 34.4645, + "step": 25780 + }, + { + "epoch": 0.05209743169156058, + "grad_norm": 394.1192626953125, + "learning_rate": 5.158e-06, + "loss": 47.2173, + "step": 25790 + }, + { + "epoch": 0.05211763232424439, + "grad_norm": 172.93893432617188, + "learning_rate": 5.1600000000000006e-06, + "loss": 27.2692, + "step": 25800 + }, + { + "epoch": 0.05213783295692821, + "grad_norm": 331.1111755371094, + "learning_rate": 5.162000000000001e-06, + "loss": 38.2051, + "step": 25810 + }, + { + "epoch": 0.052158033589612025, + "grad_norm": 62.95143508911133, + "learning_rate": 5.164e-06, + "loss": 35.2421, + "step": 25820 + }, + { + "epoch": 0.052178234222295845, + "grad_norm": 206.21441650390625, + "learning_rate": 5.166e-06, + "loss": 38.8744, + "step": 25830 + }, + { + "epoch": 0.05219843485497966, + "grad_norm": 534.4074096679688, + "learning_rate": 5.168000000000001e-06, + "loss": 45.46, + "step": 25840 + }, + { + "epoch": 0.05221863548766347, + "grad_norm": 190.77833557128906, + "learning_rate": 5.1700000000000005e-06, + "loss": 36.677, + "step": 25850 + }, + { + "epoch": 0.05223883612034729, + "grad_norm": 115.88018798828125, + "learning_rate": 5.172000000000001e-06, + "loss": 37.8392, + "step": 25860 + }, + { + "epoch": 0.052259036753031105, + "grad_norm": 408.080078125, + "learning_rate": 5.174e-06, + "loss": 32.2986, + "step": 25870 + }, + { + "epoch": 0.05227923738571492, + "grad_norm": 274.748779296875, + "learning_rate": 5.176e-06, + "loss": 39.0948, + "step": 25880 + }, + { + "epoch": 0.05229943801839874, + "grad_norm": 236.39987182617188, + "learning_rate": 5.178000000000001e-06, + "loss": 29.9548, + "step": 25890 + }, + { + "epoch": 0.05231963865108255, + "grad_norm": 579.6231079101562, + "learning_rate": 5.18e-06, + "loss": 46.9257, + "step": 25900 + }, + { + "epoch": 0.05233983928376637, + "grad_norm": 159.98565673828125, + "learning_rate": 5.1820000000000005e-06, + "loss": 51.2098, + "step": 25910 + }, + { + "epoch": 0.052360039916450184, + "grad_norm": 481.517578125, + "learning_rate": 5.184e-06, + "loss": 38.5569, + "step": 25920 + }, + { + "epoch": 0.052380240549134, + "grad_norm": 218.76170349121094, + "learning_rate": 5.186e-06, + "loss": 34.3429, + "step": 25930 + }, + { + "epoch": 0.05240044118181782, + "grad_norm": 143.18772888183594, + "learning_rate": 5.188000000000001e-06, + "loss": 28.4076, + "step": 25940 + }, + { + "epoch": 0.05242064181450163, + "grad_norm": 273.875244140625, + "learning_rate": 5.19e-06, + "loss": 18.0686, + "step": 25950 + }, + { + "epoch": 0.05244084244718544, + "grad_norm": 219.48605346679688, + "learning_rate": 5.1920000000000004e-06, + "loss": 28.1385, + "step": 25960 + }, + { + "epoch": 0.05246104307986926, + "grad_norm": 32.729549407958984, + "learning_rate": 5.194e-06, + "loss": 14.7589, + "step": 25970 + }, + { + "epoch": 0.052481243712553076, + "grad_norm": 244.2727508544922, + "learning_rate": 5.196e-06, + "loss": 28.1004, + "step": 25980 + }, + { + "epoch": 0.052501444345236896, + "grad_norm": 63.02434539794922, + "learning_rate": 5.198000000000001e-06, + "loss": 29.5537, + "step": 25990 + }, + { + "epoch": 0.05252164497792071, + "grad_norm": 315.0021667480469, + "learning_rate": 5.2e-06, + "loss": 25.2316, + "step": 26000 + }, + { + "epoch": 0.05254184561060452, + "grad_norm": 412.6581115722656, + "learning_rate": 5.202e-06, + "loss": 29.1501, + "step": 26010 + }, + { + "epoch": 0.05256204624328834, + "grad_norm": 495.1465148925781, + "learning_rate": 5.2040000000000005e-06, + "loss": 40.651, + "step": 26020 + }, + { + "epoch": 0.052582246875972155, + "grad_norm": 251.30874633789062, + "learning_rate": 5.206e-06, + "loss": 24.8458, + "step": 26030 + }, + { + "epoch": 0.05260244750865597, + "grad_norm": 340.6131896972656, + "learning_rate": 5.208000000000001e-06, + "loss": 27.7611, + "step": 26040 + }, + { + "epoch": 0.05262264814133979, + "grad_norm": 333.89617919921875, + "learning_rate": 5.210000000000001e-06, + "loss": 19.1768, + "step": 26050 + }, + { + "epoch": 0.0526428487740236, + "grad_norm": 104.84229278564453, + "learning_rate": 5.212e-06, + "loss": 41.2576, + "step": 26060 + }, + { + "epoch": 0.05266304940670742, + "grad_norm": 365.7760925292969, + "learning_rate": 5.214e-06, + "loss": 26.6519, + "step": 26070 + }, + { + "epoch": 0.052683250039391234, + "grad_norm": 535.5447998046875, + "learning_rate": 5.216e-06, + "loss": 33.7481, + "step": 26080 + }, + { + "epoch": 0.05270345067207505, + "grad_norm": 367.01727294921875, + "learning_rate": 5.218000000000001e-06, + "loss": 33.4843, + "step": 26090 + }, + { + "epoch": 0.05272365130475887, + "grad_norm": 122.15428161621094, + "learning_rate": 5.220000000000001e-06, + "loss": 14.3232, + "step": 26100 + }, + { + "epoch": 0.05274385193744268, + "grad_norm": 409.5092468261719, + "learning_rate": 5.222e-06, + "loss": 34.1983, + "step": 26110 + }, + { + "epoch": 0.05276405257012649, + "grad_norm": 290.99560546875, + "learning_rate": 5.224e-06, + "loss": 35.0679, + "step": 26120 + }, + { + "epoch": 0.05278425320281031, + "grad_norm": 5402.544921875, + "learning_rate": 5.226e-06, + "loss": 55.7997, + "step": 26130 + }, + { + "epoch": 0.052804453835494126, + "grad_norm": 0.0, + "learning_rate": 5.228000000000001e-06, + "loss": 54.6845, + "step": 26140 + }, + { + "epoch": 0.052824654468177946, + "grad_norm": 121.34117126464844, + "learning_rate": 5.230000000000001e-06, + "loss": 28.876, + "step": 26150 + }, + { + "epoch": 0.05284485510086176, + "grad_norm": 357.8832092285156, + "learning_rate": 5.232e-06, + "loss": 30.5092, + "step": 26160 + }, + { + "epoch": 0.05286505573354557, + "grad_norm": 311.82489013671875, + "learning_rate": 5.234e-06, + "loss": 39.6837, + "step": 26170 + }, + { + "epoch": 0.05288525636622939, + "grad_norm": 37.93082809448242, + "learning_rate": 5.236e-06, + "loss": 13.6538, + "step": 26180 + }, + { + "epoch": 0.052905456998913206, + "grad_norm": 345.971435546875, + "learning_rate": 5.2380000000000005e-06, + "loss": 24.8791, + "step": 26190 + }, + { + "epoch": 0.05292565763159702, + "grad_norm": 175.8555145263672, + "learning_rate": 5.240000000000001e-06, + "loss": 25.0739, + "step": 26200 + }, + { + "epoch": 0.05294585826428084, + "grad_norm": 614.9837646484375, + "learning_rate": 5.242000000000001e-06, + "loss": 28.5274, + "step": 26210 + }, + { + "epoch": 0.05296605889696465, + "grad_norm": 262.697509765625, + "learning_rate": 5.244e-06, + "loss": 21.9201, + "step": 26220 + }, + { + "epoch": 0.05298625952964847, + "grad_norm": 277.9417724609375, + "learning_rate": 5.246e-06, + "loss": 33.9503, + "step": 26230 + }, + { + "epoch": 0.053006460162332285, + "grad_norm": 508.5185852050781, + "learning_rate": 5.248000000000001e-06, + "loss": 21.1503, + "step": 26240 + }, + { + "epoch": 0.0530266607950161, + "grad_norm": 85.74031066894531, + "learning_rate": 5.2500000000000006e-06, + "loss": 54.1428, + "step": 26250 + }, + { + "epoch": 0.05304686142769992, + "grad_norm": 18.77439308166504, + "learning_rate": 5.252000000000001e-06, + "loss": 34.7449, + "step": 26260 + }, + { + "epoch": 0.05306706206038373, + "grad_norm": 434.8307800292969, + "learning_rate": 5.254e-06, + "loss": 47.4321, + "step": 26270 + }, + { + "epoch": 0.053087262693067544, + "grad_norm": 94.10435485839844, + "learning_rate": 5.256e-06, + "loss": 21.691, + "step": 26280 + }, + { + "epoch": 0.053107463325751364, + "grad_norm": 249.16969299316406, + "learning_rate": 5.258000000000001e-06, + "loss": 27.1049, + "step": 26290 + }, + { + "epoch": 0.05312766395843518, + "grad_norm": 259.4500732421875, + "learning_rate": 5.2600000000000005e-06, + "loss": 41.8547, + "step": 26300 + }, + { + "epoch": 0.053147864591119, + "grad_norm": 117.62743377685547, + "learning_rate": 5.262000000000001e-06, + "loss": 39.8521, + "step": 26310 + }, + { + "epoch": 0.05316806522380281, + "grad_norm": 718.5263061523438, + "learning_rate": 5.264e-06, + "loss": 55.0665, + "step": 26320 + }, + { + "epoch": 0.05318826585648662, + "grad_norm": 386.2872009277344, + "learning_rate": 5.266e-06, + "loss": 31.59, + "step": 26330 + }, + { + "epoch": 0.05320846648917044, + "grad_norm": 241.26043701171875, + "learning_rate": 5.268000000000001e-06, + "loss": 29.2935, + "step": 26340 + }, + { + "epoch": 0.053228667121854256, + "grad_norm": 734.0238647460938, + "learning_rate": 5.27e-06, + "loss": 49.3361, + "step": 26350 + }, + { + "epoch": 0.05324886775453807, + "grad_norm": 773.4956665039062, + "learning_rate": 5.2720000000000005e-06, + "loss": 51.6634, + "step": 26360 + }, + { + "epoch": 0.05326906838722189, + "grad_norm": 118.69739532470703, + "learning_rate": 5.274e-06, + "loss": 28.0177, + "step": 26370 + }, + { + "epoch": 0.0532892690199057, + "grad_norm": 13.86148452758789, + "learning_rate": 5.276e-06, + "loss": 17.8887, + "step": 26380 + }, + { + "epoch": 0.05330946965258952, + "grad_norm": 707.5802001953125, + "learning_rate": 5.278000000000001e-06, + "loss": 22.4515, + "step": 26390 + }, + { + "epoch": 0.053329670285273335, + "grad_norm": 293.2173156738281, + "learning_rate": 5.28e-06, + "loss": 22.0472, + "step": 26400 + }, + { + "epoch": 0.05334987091795715, + "grad_norm": 107.76779174804688, + "learning_rate": 5.282e-06, + "loss": 28.122, + "step": 26410 + }, + { + "epoch": 0.05337007155064097, + "grad_norm": 319.65496826171875, + "learning_rate": 5.2840000000000006e-06, + "loss": 54.3718, + "step": 26420 + }, + { + "epoch": 0.05339027218332478, + "grad_norm": 302.2618103027344, + "learning_rate": 5.286e-06, + "loss": 18.9766, + "step": 26430 + }, + { + "epoch": 0.053410472816008595, + "grad_norm": 124.77485656738281, + "learning_rate": 5.288000000000001e-06, + "loss": 18.7363, + "step": 26440 + }, + { + "epoch": 0.053430673448692415, + "grad_norm": 19.75719451904297, + "learning_rate": 5.290000000000001e-06, + "loss": 32.6725, + "step": 26450 + }, + { + "epoch": 0.05345087408137623, + "grad_norm": 160.94912719726562, + "learning_rate": 5.292e-06, + "loss": 43.9855, + "step": 26460 + }, + { + "epoch": 0.05347107471406005, + "grad_norm": 303.1990661621094, + "learning_rate": 5.2940000000000005e-06, + "loss": 49.4688, + "step": 26470 + }, + { + "epoch": 0.05349127534674386, + "grad_norm": 438.0093078613281, + "learning_rate": 5.296e-06, + "loss": 17.595, + "step": 26480 + }, + { + "epoch": 0.053511475979427674, + "grad_norm": 265.2622985839844, + "learning_rate": 5.298000000000001e-06, + "loss": 34.8372, + "step": 26490 + }, + { + "epoch": 0.053531676612111494, + "grad_norm": 342.2603759765625, + "learning_rate": 5.300000000000001e-06, + "loss": 64.3463, + "step": 26500 + }, + { + "epoch": 0.05355187724479531, + "grad_norm": 297.47625732421875, + "learning_rate": 5.302e-06, + "loss": 24.1028, + "step": 26510 + }, + { + "epoch": 0.05357207787747912, + "grad_norm": 217.04269409179688, + "learning_rate": 5.304e-06, + "loss": 25.0686, + "step": 26520 + }, + { + "epoch": 0.05359227851016294, + "grad_norm": 442.0998840332031, + "learning_rate": 5.306e-06, + "loss": 46.3262, + "step": 26530 + }, + { + "epoch": 0.05361247914284675, + "grad_norm": 326.03790283203125, + "learning_rate": 5.308000000000001e-06, + "loss": 11.7794, + "step": 26540 + }, + { + "epoch": 0.05363267977553057, + "grad_norm": 453.63458251953125, + "learning_rate": 5.310000000000001e-06, + "loss": 39.3804, + "step": 26550 + }, + { + "epoch": 0.053652880408214386, + "grad_norm": 53.374000549316406, + "learning_rate": 5.312e-06, + "loss": 36.1299, + "step": 26560 + }, + { + "epoch": 0.0536730810408982, + "grad_norm": 269.9315490722656, + "learning_rate": 5.314e-06, + "loss": 26.4243, + "step": 26570 + }, + { + "epoch": 0.05369328167358202, + "grad_norm": 386.93377685546875, + "learning_rate": 5.3160000000000004e-06, + "loss": 28.9528, + "step": 26580 + }, + { + "epoch": 0.05371348230626583, + "grad_norm": 253.3940887451172, + "learning_rate": 5.318000000000001e-06, + "loss": 30.4764, + "step": 26590 + }, + { + "epoch": 0.053733682938949645, + "grad_norm": 0.0, + "learning_rate": 5.320000000000001e-06, + "loss": 38.4537, + "step": 26600 + }, + { + "epoch": 0.053753883571633465, + "grad_norm": 417.5376892089844, + "learning_rate": 5.322000000000001e-06, + "loss": 25.8576, + "step": 26610 + }, + { + "epoch": 0.05377408420431728, + "grad_norm": 199.4116668701172, + "learning_rate": 5.324e-06, + "loss": 17.2241, + "step": 26620 + }, + { + "epoch": 0.0537942848370011, + "grad_norm": 743.338134765625, + "learning_rate": 5.326e-06, + "loss": 33.0902, + "step": 26630 + }, + { + "epoch": 0.05381448546968491, + "grad_norm": 156.85171508789062, + "learning_rate": 5.328000000000001e-06, + "loss": 18.8293, + "step": 26640 + }, + { + "epoch": 0.053834686102368724, + "grad_norm": 406.0733947753906, + "learning_rate": 5.330000000000001e-06, + "loss": 47.1567, + "step": 26650 + }, + { + "epoch": 0.053854886735052544, + "grad_norm": 206.8536834716797, + "learning_rate": 5.332000000000001e-06, + "loss": 58.3281, + "step": 26660 + }, + { + "epoch": 0.05387508736773636, + "grad_norm": 291.2559814453125, + "learning_rate": 5.334e-06, + "loss": 35.1946, + "step": 26670 + }, + { + "epoch": 0.05389528800042017, + "grad_norm": 275.2223815917969, + "learning_rate": 5.336e-06, + "loss": 37.8603, + "step": 26680 + }, + { + "epoch": 0.05391548863310399, + "grad_norm": 283.79132080078125, + "learning_rate": 5.338000000000001e-06, + "loss": 39.2534, + "step": 26690 + }, + { + "epoch": 0.0539356892657878, + "grad_norm": 126.51179504394531, + "learning_rate": 5.3400000000000005e-06, + "loss": 18.4498, + "step": 26700 + }, + { + "epoch": 0.05395588989847162, + "grad_norm": 298.1804504394531, + "learning_rate": 5.342000000000001e-06, + "loss": 44.271, + "step": 26710 + }, + { + "epoch": 0.053976090531155436, + "grad_norm": 400.8433532714844, + "learning_rate": 5.344e-06, + "loss": 29.7003, + "step": 26720 + }, + { + "epoch": 0.05399629116383925, + "grad_norm": 219.6957550048828, + "learning_rate": 5.346e-06, + "loss": 33.9938, + "step": 26730 + }, + { + "epoch": 0.05401649179652307, + "grad_norm": 347.8514099121094, + "learning_rate": 5.348000000000001e-06, + "loss": 42.7821, + "step": 26740 + }, + { + "epoch": 0.05403669242920688, + "grad_norm": 539.4111328125, + "learning_rate": 5.3500000000000004e-06, + "loss": 19.2532, + "step": 26750 + }, + { + "epoch": 0.054056893061890696, + "grad_norm": 515.8953247070312, + "learning_rate": 5.352000000000001e-06, + "loss": 37.248, + "step": 26760 + }, + { + "epoch": 0.054077093694574516, + "grad_norm": 1480.735595703125, + "learning_rate": 5.354e-06, + "loss": 29.1723, + "step": 26770 + }, + { + "epoch": 0.05409729432725833, + "grad_norm": 350.07733154296875, + "learning_rate": 5.356e-06, + "loss": 24.7955, + "step": 26780 + }, + { + "epoch": 0.05411749495994215, + "grad_norm": 105.70265197753906, + "learning_rate": 5.358000000000001e-06, + "loss": 21.997, + "step": 26790 + }, + { + "epoch": 0.05413769559262596, + "grad_norm": 298.6070556640625, + "learning_rate": 5.36e-06, + "loss": 24.973, + "step": 26800 + }, + { + "epoch": 0.054157896225309775, + "grad_norm": 826.4459838867188, + "learning_rate": 5.3620000000000005e-06, + "loss": 44.188, + "step": 26810 + }, + { + "epoch": 0.054178096857993595, + "grad_norm": 761.2321166992188, + "learning_rate": 5.364000000000001e-06, + "loss": 23.6996, + "step": 26820 + }, + { + "epoch": 0.05419829749067741, + "grad_norm": 198.7512969970703, + "learning_rate": 5.366e-06, + "loss": 31.6769, + "step": 26830 + }, + { + "epoch": 0.05421849812336122, + "grad_norm": 244.68893432617188, + "learning_rate": 5.368000000000001e-06, + "loss": 48.2041, + "step": 26840 + }, + { + "epoch": 0.05423869875604504, + "grad_norm": 307.557373046875, + "learning_rate": 5.370000000000001e-06, + "loss": 23.0467, + "step": 26850 + }, + { + "epoch": 0.054258899388728854, + "grad_norm": 324.9233703613281, + "learning_rate": 5.372e-06, + "loss": 20.549, + "step": 26860 + }, + { + "epoch": 0.054279100021412674, + "grad_norm": 232.0588836669922, + "learning_rate": 5.3740000000000006e-06, + "loss": 27.1651, + "step": 26870 + }, + { + "epoch": 0.05429930065409649, + "grad_norm": 227.4842071533203, + "learning_rate": 5.376e-06, + "loss": 29.5139, + "step": 26880 + }, + { + "epoch": 0.0543195012867803, + "grad_norm": 323.15338134765625, + "learning_rate": 5.378e-06, + "loss": 48.1717, + "step": 26890 + }, + { + "epoch": 0.05433970191946412, + "grad_norm": 225.5341033935547, + "learning_rate": 5.380000000000001e-06, + "loss": 31.1154, + "step": 26900 + }, + { + "epoch": 0.05435990255214793, + "grad_norm": 504.28497314453125, + "learning_rate": 5.382e-06, + "loss": 26.7281, + "step": 26910 + }, + { + "epoch": 0.054380103184831746, + "grad_norm": 144.5900115966797, + "learning_rate": 5.3840000000000005e-06, + "loss": 23.0646, + "step": 26920 + }, + { + "epoch": 0.054400303817515566, + "grad_norm": 251.57594299316406, + "learning_rate": 5.386e-06, + "loss": 33.4303, + "step": 26930 + }, + { + "epoch": 0.05442050445019938, + "grad_norm": 930.7855834960938, + "learning_rate": 5.388e-06, + "loss": 37.4148, + "step": 26940 + }, + { + "epoch": 0.0544407050828832, + "grad_norm": 309.0507507324219, + "learning_rate": 5.390000000000001e-06, + "loss": 29.1236, + "step": 26950 + }, + { + "epoch": 0.05446090571556701, + "grad_norm": 399.0177001953125, + "learning_rate": 5.392e-06, + "loss": 33.7618, + "step": 26960 + }, + { + "epoch": 0.054481106348250825, + "grad_norm": 568.4434204101562, + "learning_rate": 5.394e-06, + "loss": 42.4966, + "step": 26970 + }, + { + "epoch": 0.054501306980934645, + "grad_norm": 78.31511688232422, + "learning_rate": 5.3960000000000005e-06, + "loss": 21.6994, + "step": 26980 + }, + { + "epoch": 0.05452150761361846, + "grad_norm": 203.22230529785156, + "learning_rate": 5.398e-06, + "loss": 57.3447, + "step": 26990 + }, + { + "epoch": 0.05454170824630227, + "grad_norm": 316.94281005859375, + "learning_rate": 5.400000000000001e-06, + "loss": 21.6052, + "step": 27000 + }, + { + "epoch": 0.05456190887898609, + "grad_norm": 349.6338195800781, + "learning_rate": 5.402000000000001e-06, + "loss": 25.8364, + "step": 27010 + }, + { + "epoch": 0.054582109511669905, + "grad_norm": 269.2860412597656, + "learning_rate": 5.404e-06, + "loss": 36.8285, + "step": 27020 + }, + { + "epoch": 0.054602310144353725, + "grad_norm": 590.0916748046875, + "learning_rate": 5.406e-06, + "loss": 30.6549, + "step": 27030 + }, + { + "epoch": 0.05462251077703754, + "grad_norm": 163.40185546875, + "learning_rate": 5.408e-06, + "loss": 16.0808, + "step": 27040 + }, + { + "epoch": 0.05464271140972135, + "grad_norm": 426.44573974609375, + "learning_rate": 5.410000000000001e-06, + "loss": 22.1718, + "step": 27050 + }, + { + "epoch": 0.05466291204240517, + "grad_norm": 99.6770248413086, + "learning_rate": 5.412000000000001e-06, + "loss": 24.1849, + "step": 27060 + }, + { + "epoch": 0.054683112675088984, + "grad_norm": 481.256591796875, + "learning_rate": 5.414e-06, + "loss": 38.6982, + "step": 27070 + }, + { + "epoch": 0.0547033133077728, + "grad_norm": 145.97348022460938, + "learning_rate": 5.416e-06, + "loss": 38.3321, + "step": 27080 + }, + { + "epoch": 0.05472351394045662, + "grad_norm": 244.0701446533203, + "learning_rate": 5.418e-06, + "loss": 44.1002, + "step": 27090 + }, + { + "epoch": 0.05474371457314043, + "grad_norm": 66.1285171508789, + "learning_rate": 5.420000000000001e-06, + "loss": 41.6084, + "step": 27100 + }, + { + "epoch": 0.05476391520582425, + "grad_norm": 471.6724548339844, + "learning_rate": 5.422000000000001e-06, + "loss": 25.6201, + "step": 27110 + }, + { + "epoch": 0.05478411583850806, + "grad_norm": 116.57247161865234, + "learning_rate": 5.424e-06, + "loss": 30.7712, + "step": 27120 + }, + { + "epoch": 0.054804316471191876, + "grad_norm": 235.6954803466797, + "learning_rate": 5.426e-06, + "loss": 25.578, + "step": 27130 + }, + { + "epoch": 0.054824517103875696, + "grad_norm": 229.59060668945312, + "learning_rate": 5.4279999999999995e-06, + "loss": 38.1911, + "step": 27140 + }, + { + "epoch": 0.05484471773655951, + "grad_norm": 369.60626220703125, + "learning_rate": 5.4300000000000005e-06, + "loss": 15.9542, + "step": 27150 + }, + { + "epoch": 0.05486491836924332, + "grad_norm": 556.3214721679688, + "learning_rate": 5.432000000000001e-06, + "loss": 25.2471, + "step": 27160 + }, + { + "epoch": 0.05488511900192714, + "grad_norm": 453.157958984375, + "learning_rate": 5.434e-06, + "loss": 39.5062, + "step": 27170 + }, + { + "epoch": 0.054905319634610955, + "grad_norm": 239.89064025878906, + "learning_rate": 5.436e-06, + "loss": 25.2629, + "step": 27180 + }, + { + "epoch": 0.054925520267294775, + "grad_norm": 190.35305786132812, + "learning_rate": 5.438e-06, + "loss": 18.187, + "step": 27190 + }, + { + "epoch": 0.05494572089997859, + "grad_norm": 395.3410339355469, + "learning_rate": 5.4400000000000004e-06, + "loss": 25.4592, + "step": 27200 + }, + { + "epoch": 0.0549659215326624, + "grad_norm": 688.203125, + "learning_rate": 5.442000000000001e-06, + "loss": 63.9065, + "step": 27210 + }, + { + "epoch": 0.05498612216534622, + "grad_norm": 192.1884002685547, + "learning_rate": 5.444000000000001e-06, + "loss": 40.2984, + "step": 27220 + }, + { + "epoch": 0.055006322798030034, + "grad_norm": 211.1009521484375, + "learning_rate": 5.446e-06, + "loss": 29.2781, + "step": 27230 + }, + { + "epoch": 0.05502652343071385, + "grad_norm": 109.21246337890625, + "learning_rate": 5.448e-06, + "loss": 17.4171, + "step": 27240 + }, + { + "epoch": 0.05504672406339767, + "grad_norm": 353.7481994628906, + "learning_rate": 5.450000000000001e-06, + "loss": 45.4631, + "step": 27250 + }, + { + "epoch": 0.05506692469608148, + "grad_norm": 745.6088256835938, + "learning_rate": 5.4520000000000005e-06, + "loss": 32.752, + "step": 27260 + }, + { + "epoch": 0.0550871253287653, + "grad_norm": 208.9687957763672, + "learning_rate": 5.454000000000001e-06, + "loss": 13.6371, + "step": 27270 + }, + { + "epoch": 0.05510732596144911, + "grad_norm": 350.83941650390625, + "learning_rate": 5.456e-06, + "loss": 43.5445, + "step": 27280 + }, + { + "epoch": 0.055127526594132926, + "grad_norm": 341.1553649902344, + "learning_rate": 5.458e-06, + "loss": 38.8892, + "step": 27290 + }, + { + "epoch": 0.055147727226816746, + "grad_norm": 359.916748046875, + "learning_rate": 5.460000000000001e-06, + "loss": 44.6945, + "step": 27300 + }, + { + "epoch": 0.05516792785950056, + "grad_norm": 38.90704345703125, + "learning_rate": 5.462e-06, + "loss": 14.8064, + "step": 27310 + }, + { + "epoch": 0.05518812849218437, + "grad_norm": 278.1080322265625, + "learning_rate": 5.4640000000000005e-06, + "loss": 63.8273, + "step": 27320 + }, + { + "epoch": 0.05520832912486819, + "grad_norm": 436.64990234375, + "learning_rate": 5.466e-06, + "loss": 33.218, + "step": 27330 + }, + { + "epoch": 0.055228529757552006, + "grad_norm": 314.9707946777344, + "learning_rate": 5.468e-06, + "loss": 22.9617, + "step": 27340 + }, + { + "epoch": 0.05524873039023582, + "grad_norm": 554.5739135742188, + "learning_rate": 5.470000000000001e-06, + "loss": 18.1486, + "step": 27350 + }, + { + "epoch": 0.05526893102291964, + "grad_norm": 226.20201110839844, + "learning_rate": 5.472e-06, + "loss": 17.9212, + "step": 27360 + }, + { + "epoch": 0.05528913165560345, + "grad_norm": 234.67518615722656, + "learning_rate": 5.4740000000000004e-06, + "loss": 37.0259, + "step": 27370 + }, + { + "epoch": 0.05530933228828727, + "grad_norm": 259.5121765136719, + "learning_rate": 5.476000000000001e-06, + "loss": 25.7845, + "step": 27380 + }, + { + "epoch": 0.055329532920971085, + "grad_norm": 378.53460693359375, + "learning_rate": 5.478e-06, + "loss": 38.2042, + "step": 27390 + }, + { + "epoch": 0.0553497335536549, + "grad_norm": 247.5023956298828, + "learning_rate": 5.480000000000001e-06, + "loss": 33.0884, + "step": 27400 + }, + { + "epoch": 0.05536993418633872, + "grad_norm": 491.0729675292969, + "learning_rate": 5.482000000000001e-06, + "loss": 26.6517, + "step": 27410 + }, + { + "epoch": 0.05539013481902253, + "grad_norm": 308.5608825683594, + "learning_rate": 5.484e-06, + "loss": 26.3782, + "step": 27420 + }, + { + "epoch": 0.055410335451706344, + "grad_norm": 361.5599365234375, + "learning_rate": 5.4860000000000005e-06, + "loss": 31.672, + "step": 27430 + }, + { + "epoch": 0.055430536084390164, + "grad_norm": 293.3143310546875, + "learning_rate": 5.488e-06, + "loss": 26.1239, + "step": 27440 + }, + { + "epoch": 0.05545073671707398, + "grad_norm": 238.87124633789062, + "learning_rate": 5.490000000000001e-06, + "loss": 17.9067, + "step": 27450 + }, + { + "epoch": 0.0554709373497578, + "grad_norm": 300.4674072265625, + "learning_rate": 5.492000000000001e-06, + "loss": 22.1399, + "step": 27460 + }, + { + "epoch": 0.05549113798244161, + "grad_norm": 1151.210693359375, + "learning_rate": 5.494e-06, + "loss": 31.8923, + "step": 27470 + }, + { + "epoch": 0.05551133861512542, + "grad_norm": 398.8284912109375, + "learning_rate": 5.496e-06, + "loss": 34.124, + "step": 27480 + }, + { + "epoch": 0.05553153924780924, + "grad_norm": 182.739990234375, + "learning_rate": 5.498e-06, + "loss": 16.2706, + "step": 27490 + }, + { + "epoch": 0.055551739880493056, + "grad_norm": 69.2137451171875, + "learning_rate": 5.500000000000001e-06, + "loss": 43.4489, + "step": 27500 + }, + { + "epoch": 0.05557194051317687, + "grad_norm": 243.2827911376953, + "learning_rate": 5.502000000000001e-06, + "loss": 26.3211, + "step": 27510 + }, + { + "epoch": 0.05559214114586069, + "grad_norm": 877.0611572265625, + "learning_rate": 5.504e-06, + "loss": 35.1816, + "step": 27520 + }, + { + "epoch": 0.0556123417785445, + "grad_norm": 125.4283447265625, + "learning_rate": 5.506e-06, + "loss": 15.6442, + "step": 27530 + }, + { + "epoch": 0.05563254241122832, + "grad_norm": 243.79249572753906, + "learning_rate": 5.508e-06, + "loss": 11.2258, + "step": 27540 + }, + { + "epoch": 0.055652743043912135, + "grad_norm": 404.4970397949219, + "learning_rate": 5.510000000000001e-06, + "loss": 28.2651, + "step": 27550 + }, + { + "epoch": 0.05567294367659595, + "grad_norm": 287.2158508300781, + "learning_rate": 5.512000000000001e-06, + "loss": 23.4717, + "step": 27560 + }, + { + "epoch": 0.05569314430927977, + "grad_norm": 57.77788543701172, + "learning_rate": 5.514e-06, + "loss": 25.1778, + "step": 27570 + }, + { + "epoch": 0.05571334494196358, + "grad_norm": 80.17808532714844, + "learning_rate": 5.516e-06, + "loss": 30.3234, + "step": 27580 + }, + { + "epoch": 0.055733545574647395, + "grad_norm": 370.5777893066406, + "learning_rate": 5.518e-06, + "loss": 44.0101, + "step": 27590 + }, + { + "epoch": 0.055753746207331215, + "grad_norm": 248.14292907714844, + "learning_rate": 5.5200000000000005e-06, + "loss": 34.1338, + "step": 27600 + }, + { + "epoch": 0.05577394684001503, + "grad_norm": 297.3516845703125, + "learning_rate": 5.522000000000001e-06, + "loss": 22.6625, + "step": 27610 + }, + { + "epoch": 0.05579414747269885, + "grad_norm": 151.025146484375, + "learning_rate": 5.524000000000001e-06, + "loss": 20.9243, + "step": 27620 + }, + { + "epoch": 0.05581434810538266, + "grad_norm": 679.7206420898438, + "learning_rate": 5.526e-06, + "loss": 40.1341, + "step": 27630 + }, + { + "epoch": 0.055834548738066474, + "grad_norm": 2318.187744140625, + "learning_rate": 5.528e-06, + "loss": 28.5167, + "step": 27640 + }, + { + "epoch": 0.055854749370750294, + "grad_norm": 308.1387023925781, + "learning_rate": 5.530000000000001e-06, + "loss": 47.0316, + "step": 27650 + }, + { + "epoch": 0.05587495000343411, + "grad_norm": 67.56517028808594, + "learning_rate": 5.5320000000000006e-06, + "loss": 23.0679, + "step": 27660 + }, + { + "epoch": 0.05589515063611792, + "grad_norm": 924.32080078125, + "learning_rate": 5.534000000000001e-06, + "loss": 25.1727, + "step": 27670 + }, + { + "epoch": 0.05591535126880174, + "grad_norm": 282.2278137207031, + "learning_rate": 5.536e-06, + "loss": 38.4195, + "step": 27680 + }, + { + "epoch": 0.05593555190148555, + "grad_norm": 378.9815979003906, + "learning_rate": 5.538e-06, + "loss": 27.4518, + "step": 27690 + }, + { + "epoch": 0.05595575253416937, + "grad_norm": 163.6479949951172, + "learning_rate": 5.540000000000001e-06, + "loss": 37.4892, + "step": 27700 + }, + { + "epoch": 0.055975953166853186, + "grad_norm": 88.61754608154297, + "learning_rate": 5.5420000000000005e-06, + "loss": 25.9047, + "step": 27710 + }, + { + "epoch": 0.055996153799537, + "grad_norm": 241.44847106933594, + "learning_rate": 5.544000000000001e-06, + "loss": 36.617, + "step": 27720 + }, + { + "epoch": 0.05601635443222082, + "grad_norm": 285.30438232421875, + "learning_rate": 5.546e-06, + "loss": 31.8549, + "step": 27730 + }, + { + "epoch": 0.05603655506490463, + "grad_norm": 136.18719482421875, + "learning_rate": 5.548e-06, + "loss": 43.947, + "step": 27740 + }, + { + "epoch": 0.056056755697588445, + "grad_norm": 493.2935791015625, + "learning_rate": 5.550000000000001e-06, + "loss": 33.054, + "step": 27750 + }, + { + "epoch": 0.056076956330272265, + "grad_norm": 381.3257751464844, + "learning_rate": 5.552e-06, + "loss": 35.3583, + "step": 27760 + }, + { + "epoch": 0.05609715696295608, + "grad_norm": 93.75751495361328, + "learning_rate": 5.5540000000000005e-06, + "loss": 15.3817, + "step": 27770 + }, + { + "epoch": 0.0561173575956399, + "grad_norm": 337.303466796875, + "learning_rate": 5.556000000000001e-06, + "loss": 24.3953, + "step": 27780 + }, + { + "epoch": 0.05613755822832371, + "grad_norm": 145.49217224121094, + "learning_rate": 5.558e-06, + "loss": 19.2551, + "step": 27790 + }, + { + "epoch": 0.056157758861007524, + "grad_norm": 69.3620834350586, + "learning_rate": 5.560000000000001e-06, + "loss": 34.3316, + "step": 27800 + }, + { + "epoch": 0.056177959493691344, + "grad_norm": 293.1623229980469, + "learning_rate": 5.562000000000001e-06, + "loss": 22.2989, + "step": 27810 + }, + { + "epoch": 0.05619816012637516, + "grad_norm": 157.4688720703125, + "learning_rate": 5.5640000000000004e-06, + "loss": 34.6148, + "step": 27820 + }, + { + "epoch": 0.05621836075905897, + "grad_norm": 1601.130859375, + "learning_rate": 5.566000000000001e-06, + "loss": 43.8286, + "step": 27830 + }, + { + "epoch": 0.05623856139174279, + "grad_norm": 205.823486328125, + "learning_rate": 5.568e-06, + "loss": 45.9922, + "step": 27840 + }, + { + "epoch": 0.0562587620244266, + "grad_norm": 59.5796012878418, + "learning_rate": 5.570000000000001e-06, + "loss": 24.7914, + "step": 27850 + }, + { + "epoch": 0.05627896265711042, + "grad_norm": 333.1473083496094, + "learning_rate": 5.572000000000001e-06, + "loss": 30.5427, + "step": 27860 + }, + { + "epoch": 0.056299163289794237, + "grad_norm": 324.0419616699219, + "learning_rate": 5.574e-06, + "loss": 34.5958, + "step": 27870 + }, + { + "epoch": 0.05631936392247805, + "grad_norm": 227.6458282470703, + "learning_rate": 5.5760000000000005e-06, + "loss": 23.0325, + "step": 27880 + }, + { + "epoch": 0.05633956455516187, + "grad_norm": 352.4715576171875, + "learning_rate": 5.578e-06, + "loss": 29.4214, + "step": 27890 + }, + { + "epoch": 0.05635976518784568, + "grad_norm": 128.63223266601562, + "learning_rate": 5.580000000000001e-06, + "loss": 26.9033, + "step": 27900 + }, + { + "epoch": 0.056379965820529496, + "grad_norm": 295.8462829589844, + "learning_rate": 5.582000000000001e-06, + "loss": 30.203, + "step": 27910 + }, + { + "epoch": 0.056400166453213316, + "grad_norm": 153.4561767578125, + "learning_rate": 5.584e-06, + "loss": 28.914, + "step": 27920 + }, + { + "epoch": 0.05642036708589713, + "grad_norm": 420.8994445800781, + "learning_rate": 5.586e-06, + "loss": 28.6144, + "step": 27930 + }, + { + "epoch": 0.05644056771858095, + "grad_norm": 219.6123046875, + "learning_rate": 5.588e-06, + "loss": 31.8929, + "step": 27940 + }, + { + "epoch": 0.05646076835126476, + "grad_norm": 134.90292358398438, + "learning_rate": 5.590000000000001e-06, + "loss": 40.4273, + "step": 27950 + }, + { + "epoch": 0.056480968983948575, + "grad_norm": 523.1329956054688, + "learning_rate": 5.592000000000001e-06, + "loss": 18.6551, + "step": 27960 + }, + { + "epoch": 0.056501169616632395, + "grad_norm": 512.2754516601562, + "learning_rate": 5.594e-06, + "loss": 47.292, + "step": 27970 + }, + { + "epoch": 0.05652137024931621, + "grad_norm": 55.876888275146484, + "learning_rate": 5.596e-06, + "loss": 31.8431, + "step": 27980 + }, + { + "epoch": 0.05654157088200002, + "grad_norm": 181.684814453125, + "learning_rate": 5.5980000000000004e-06, + "loss": 30.0193, + "step": 27990 + }, + { + "epoch": 0.05656177151468384, + "grad_norm": 204.7184295654297, + "learning_rate": 5.600000000000001e-06, + "loss": 19.8612, + "step": 28000 + }, + { + "epoch": 0.056581972147367654, + "grad_norm": 0.0, + "learning_rate": 5.602000000000001e-06, + "loss": 45.8665, + "step": 28010 + }, + { + "epoch": 0.056602172780051474, + "grad_norm": 463.9403381347656, + "learning_rate": 5.604000000000001e-06, + "loss": 45.7898, + "step": 28020 + }, + { + "epoch": 0.05662237341273529, + "grad_norm": 321.863037109375, + "learning_rate": 5.606e-06, + "loss": 28.8418, + "step": 28030 + }, + { + "epoch": 0.0566425740454191, + "grad_norm": 446.02069091796875, + "learning_rate": 5.608e-06, + "loss": 26.2253, + "step": 28040 + }, + { + "epoch": 0.05666277467810292, + "grad_norm": 254.49205017089844, + "learning_rate": 5.610000000000001e-06, + "loss": 32.8853, + "step": 28050 + }, + { + "epoch": 0.05668297531078673, + "grad_norm": 439.8501892089844, + "learning_rate": 5.612000000000001e-06, + "loss": 26.3658, + "step": 28060 + }, + { + "epoch": 0.056703175943470546, + "grad_norm": 306.6741638183594, + "learning_rate": 5.614000000000001e-06, + "loss": 37.8322, + "step": 28070 + }, + { + "epoch": 0.056723376576154366, + "grad_norm": 260.4828796386719, + "learning_rate": 5.616e-06, + "loss": 29.4606, + "step": 28080 + }, + { + "epoch": 0.05674357720883818, + "grad_norm": 72.44647979736328, + "learning_rate": 5.618e-06, + "loss": 30.5602, + "step": 28090 + }, + { + "epoch": 0.056763777841522, + "grad_norm": 290.8236999511719, + "learning_rate": 5.620000000000001e-06, + "loss": 30.3998, + "step": 28100 + }, + { + "epoch": 0.05678397847420581, + "grad_norm": 102.21273803710938, + "learning_rate": 5.6220000000000006e-06, + "loss": 37.064, + "step": 28110 + }, + { + "epoch": 0.056804179106889625, + "grad_norm": 379.03729248046875, + "learning_rate": 5.624000000000001e-06, + "loss": 35.541, + "step": 28120 + }, + { + "epoch": 0.056824379739573445, + "grad_norm": 236.905517578125, + "learning_rate": 5.626e-06, + "loss": 42.933, + "step": 28130 + }, + { + "epoch": 0.05684458037225726, + "grad_norm": 582.6843872070312, + "learning_rate": 5.628e-06, + "loss": 37.1196, + "step": 28140 + }, + { + "epoch": 0.05686478100494107, + "grad_norm": 565.9315185546875, + "learning_rate": 5.63e-06, + "loss": 24.5229, + "step": 28150 + }, + { + "epoch": 0.05688498163762489, + "grad_norm": 124.73725891113281, + "learning_rate": 5.6320000000000005e-06, + "loss": 18.64, + "step": 28160 + }, + { + "epoch": 0.056905182270308705, + "grad_norm": 29.175222396850586, + "learning_rate": 5.634000000000001e-06, + "loss": 33.4167, + "step": 28170 + }, + { + "epoch": 0.056925382902992525, + "grad_norm": 493.8686218261719, + "learning_rate": 5.636000000000001e-06, + "loss": 16.7518, + "step": 28180 + }, + { + "epoch": 0.05694558353567634, + "grad_norm": 473.3516845703125, + "learning_rate": 5.638e-06, + "loss": 38.6327, + "step": 28190 + }, + { + "epoch": 0.05696578416836015, + "grad_norm": 506.81524658203125, + "learning_rate": 5.64e-06, + "loss": 54.423, + "step": 28200 + }, + { + "epoch": 0.05698598480104397, + "grad_norm": 187.45361328125, + "learning_rate": 5.642000000000001e-06, + "loss": 28.4133, + "step": 28210 + }, + { + "epoch": 0.057006185433727784, + "grad_norm": 1286.269287109375, + "learning_rate": 5.6440000000000005e-06, + "loss": 30.785, + "step": 28220 + }, + { + "epoch": 0.0570263860664116, + "grad_norm": 155.1964111328125, + "learning_rate": 5.646000000000001e-06, + "loss": 32.1819, + "step": 28230 + }, + { + "epoch": 0.05704658669909542, + "grad_norm": 142.5745391845703, + "learning_rate": 5.648e-06, + "loss": 26.0579, + "step": 28240 + }, + { + "epoch": 0.05706678733177923, + "grad_norm": 382.9991455078125, + "learning_rate": 5.65e-06, + "loss": 36.7989, + "step": 28250 + }, + { + "epoch": 0.05708698796446305, + "grad_norm": 311.9433288574219, + "learning_rate": 5.652000000000001e-06, + "loss": 30.9128, + "step": 28260 + }, + { + "epoch": 0.05710718859714686, + "grad_norm": 821.916259765625, + "learning_rate": 5.654e-06, + "loss": 61.4871, + "step": 28270 + }, + { + "epoch": 0.057127389229830676, + "grad_norm": 742.4916381835938, + "learning_rate": 5.6560000000000006e-06, + "loss": 32.3058, + "step": 28280 + }, + { + "epoch": 0.057147589862514496, + "grad_norm": 90.88944244384766, + "learning_rate": 5.658e-06, + "loss": 27.2073, + "step": 28290 + }, + { + "epoch": 0.05716779049519831, + "grad_norm": 63.00052261352539, + "learning_rate": 5.66e-06, + "loss": 27.3424, + "step": 28300 + }, + { + "epoch": 0.05718799112788212, + "grad_norm": 67.73237609863281, + "learning_rate": 5.662000000000001e-06, + "loss": 35.9595, + "step": 28310 + }, + { + "epoch": 0.05720819176056594, + "grad_norm": 78.1071548461914, + "learning_rate": 5.664e-06, + "loss": 9.6641, + "step": 28320 + }, + { + "epoch": 0.057228392393249755, + "grad_norm": 901.3255004882812, + "learning_rate": 5.6660000000000005e-06, + "loss": 29.1247, + "step": 28330 + }, + { + "epoch": 0.057248593025933575, + "grad_norm": 631.9803466796875, + "learning_rate": 5.668e-06, + "loss": 32.6025, + "step": 28340 + }, + { + "epoch": 0.05726879365861739, + "grad_norm": 269.4149169921875, + "learning_rate": 5.67e-06, + "loss": 25.6203, + "step": 28350 + }, + { + "epoch": 0.0572889942913012, + "grad_norm": 665.5552978515625, + "learning_rate": 5.672000000000001e-06, + "loss": 27.0234, + "step": 28360 + }, + { + "epoch": 0.05730919492398502, + "grad_norm": 1237.1932373046875, + "learning_rate": 5.674e-06, + "loss": 41.6504, + "step": 28370 + }, + { + "epoch": 0.057329395556668834, + "grad_norm": 165.74679565429688, + "learning_rate": 5.676e-06, + "loss": 22.0172, + "step": 28380 + }, + { + "epoch": 0.05734959618935265, + "grad_norm": 355.13836669921875, + "learning_rate": 5.6780000000000005e-06, + "loss": 16.2022, + "step": 28390 + }, + { + "epoch": 0.05736979682203647, + "grad_norm": 275.1878662109375, + "learning_rate": 5.68e-06, + "loss": 22.4103, + "step": 28400 + }, + { + "epoch": 0.05738999745472028, + "grad_norm": 129.44248962402344, + "learning_rate": 5.682000000000001e-06, + "loss": 11.4887, + "step": 28410 + }, + { + "epoch": 0.0574101980874041, + "grad_norm": 272.0674743652344, + "learning_rate": 5.684000000000001e-06, + "loss": 48.8762, + "step": 28420 + }, + { + "epoch": 0.05743039872008791, + "grad_norm": 138.18812561035156, + "learning_rate": 5.686e-06, + "loss": 33.4573, + "step": 28430 + }, + { + "epoch": 0.057450599352771727, + "grad_norm": 180.22451782226562, + "learning_rate": 5.6880000000000004e-06, + "loss": 18.0441, + "step": 28440 + }, + { + "epoch": 0.057470799985455547, + "grad_norm": 147.80079650878906, + "learning_rate": 5.69e-06, + "loss": 20.8648, + "step": 28450 + }, + { + "epoch": 0.05749100061813936, + "grad_norm": 251.6083221435547, + "learning_rate": 5.692000000000001e-06, + "loss": 31.337, + "step": 28460 + }, + { + "epoch": 0.05751120125082317, + "grad_norm": 408.03497314453125, + "learning_rate": 5.694000000000001e-06, + "loss": 43.0213, + "step": 28470 + }, + { + "epoch": 0.05753140188350699, + "grad_norm": 310.3747863769531, + "learning_rate": 5.696e-06, + "loss": 20.943, + "step": 28480 + }, + { + "epoch": 0.057551602516190806, + "grad_norm": 0.0, + "learning_rate": 5.698e-06, + "loss": 30.0415, + "step": 28490 + }, + { + "epoch": 0.057571803148874626, + "grad_norm": 281.2377014160156, + "learning_rate": 5.7e-06, + "loss": 31.7942, + "step": 28500 + }, + { + "epoch": 0.05759200378155844, + "grad_norm": 301.1387023925781, + "learning_rate": 5.702000000000001e-06, + "loss": 36.2539, + "step": 28510 + }, + { + "epoch": 0.05761220441424225, + "grad_norm": 426.962158203125, + "learning_rate": 5.704000000000001e-06, + "loss": 34.8207, + "step": 28520 + }, + { + "epoch": 0.05763240504692607, + "grad_norm": 122.75354766845703, + "learning_rate": 5.706e-06, + "loss": 25.3265, + "step": 28530 + }, + { + "epoch": 0.057652605679609885, + "grad_norm": 35.92996597290039, + "learning_rate": 5.708e-06, + "loss": 30.0774, + "step": 28540 + }, + { + "epoch": 0.0576728063122937, + "grad_norm": 271.5851745605469, + "learning_rate": 5.71e-06, + "loss": 29.5664, + "step": 28550 + }, + { + "epoch": 0.05769300694497752, + "grad_norm": 82.54947662353516, + "learning_rate": 5.7120000000000005e-06, + "loss": 33.1574, + "step": 28560 + }, + { + "epoch": 0.05771320757766133, + "grad_norm": 158.8838653564453, + "learning_rate": 5.714000000000001e-06, + "loss": 31.1948, + "step": 28570 + }, + { + "epoch": 0.05773340821034515, + "grad_norm": 90.04515075683594, + "learning_rate": 5.716000000000001e-06, + "loss": 58.9537, + "step": 28580 + }, + { + "epoch": 0.057753608843028964, + "grad_norm": 157.55052185058594, + "learning_rate": 5.718e-06, + "loss": 35.8344, + "step": 28590 + }, + { + "epoch": 0.05777380947571278, + "grad_norm": 187.2686767578125, + "learning_rate": 5.72e-06, + "loss": 33.3788, + "step": 28600 + }, + { + "epoch": 0.0577940101083966, + "grad_norm": 705.1644897460938, + "learning_rate": 5.722000000000001e-06, + "loss": 26.3549, + "step": 28610 + }, + { + "epoch": 0.05781421074108041, + "grad_norm": 1000.5523071289062, + "learning_rate": 5.724000000000001e-06, + "loss": 35.8475, + "step": 28620 + }, + { + "epoch": 0.05783441137376422, + "grad_norm": 204.24722290039062, + "learning_rate": 5.726000000000001e-06, + "loss": 29.3729, + "step": 28630 + }, + { + "epoch": 0.05785461200644804, + "grad_norm": 374.9010009765625, + "learning_rate": 5.728e-06, + "loss": 30.2926, + "step": 28640 + }, + { + "epoch": 0.057874812639131856, + "grad_norm": 182.42819213867188, + "learning_rate": 5.73e-06, + "loss": 22.1948, + "step": 28650 + }, + { + "epoch": 0.057895013271815676, + "grad_norm": 599.50146484375, + "learning_rate": 5.732000000000001e-06, + "loss": 21.8648, + "step": 28660 + }, + { + "epoch": 0.05791521390449949, + "grad_norm": 717.0021362304688, + "learning_rate": 5.7340000000000005e-06, + "loss": 44.6701, + "step": 28670 + }, + { + "epoch": 0.0579354145371833, + "grad_norm": 186.2231903076172, + "learning_rate": 5.736000000000001e-06, + "loss": 16.5107, + "step": 28680 + }, + { + "epoch": 0.05795561516986712, + "grad_norm": 554.5145263671875, + "learning_rate": 5.738e-06, + "loss": 29.3926, + "step": 28690 + }, + { + "epoch": 0.057975815802550935, + "grad_norm": 155.5208740234375, + "learning_rate": 5.74e-06, + "loss": 56.2564, + "step": 28700 + }, + { + "epoch": 0.05799601643523475, + "grad_norm": 294.67913818359375, + "learning_rate": 5.742000000000001e-06, + "loss": 36.4084, + "step": 28710 + }, + { + "epoch": 0.05801621706791857, + "grad_norm": 433.7984619140625, + "learning_rate": 5.744e-06, + "loss": 34.2891, + "step": 28720 + }, + { + "epoch": 0.05803641770060238, + "grad_norm": 609.890625, + "learning_rate": 5.7460000000000006e-06, + "loss": 23.8575, + "step": 28730 + }, + { + "epoch": 0.0580566183332862, + "grad_norm": 654.7755126953125, + "learning_rate": 5.748e-06, + "loss": 31.2064, + "step": 28740 + }, + { + "epoch": 0.058076818965970015, + "grad_norm": 200.70631408691406, + "learning_rate": 5.75e-06, + "loss": 21.5167, + "step": 28750 + }, + { + "epoch": 0.05809701959865383, + "grad_norm": 600.5113525390625, + "learning_rate": 5.752000000000001e-06, + "loss": 43.6205, + "step": 28760 + }, + { + "epoch": 0.05811722023133765, + "grad_norm": 307.6865539550781, + "learning_rate": 5.754e-06, + "loss": 38.44, + "step": 28770 + }, + { + "epoch": 0.05813742086402146, + "grad_norm": 643.4122924804688, + "learning_rate": 5.7560000000000005e-06, + "loss": 33.5479, + "step": 28780 + }, + { + "epoch": 0.058157621496705274, + "grad_norm": 401.5971984863281, + "learning_rate": 5.758000000000001e-06, + "loss": 27.5044, + "step": 28790 + }, + { + "epoch": 0.058177822129389094, + "grad_norm": 169.2650146484375, + "learning_rate": 5.76e-06, + "loss": 24.2614, + "step": 28800 + }, + { + "epoch": 0.05819802276207291, + "grad_norm": 248.36412048339844, + "learning_rate": 5.762000000000001e-06, + "loss": 42.2762, + "step": 28810 + }, + { + "epoch": 0.05821822339475673, + "grad_norm": 686.0226440429688, + "learning_rate": 5.764000000000001e-06, + "loss": 42.2404, + "step": 28820 + }, + { + "epoch": 0.05823842402744054, + "grad_norm": 59.56367111206055, + "learning_rate": 5.766e-06, + "loss": 34.5825, + "step": 28830 + }, + { + "epoch": 0.05825862466012435, + "grad_norm": 303.09417724609375, + "learning_rate": 5.7680000000000005e-06, + "loss": 28.0605, + "step": 28840 + }, + { + "epoch": 0.05827882529280817, + "grad_norm": 253.83326721191406, + "learning_rate": 5.77e-06, + "loss": 25.3181, + "step": 28850 + }, + { + "epoch": 0.058299025925491986, + "grad_norm": 206.4472198486328, + "learning_rate": 5.772000000000001e-06, + "loss": 20.0452, + "step": 28860 + }, + { + "epoch": 0.0583192265581758, + "grad_norm": 387.855224609375, + "learning_rate": 5.774000000000001e-06, + "loss": 34.5938, + "step": 28870 + }, + { + "epoch": 0.05833942719085962, + "grad_norm": 243.99778747558594, + "learning_rate": 5.776e-06, + "loss": 20.1394, + "step": 28880 + }, + { + "epoch": 0.05835962782354343, + "grad_norm": 798.8829956054688, + "learning_rate": 5.778e-06, + "loss": 28.9229, + "step": 28890 + }, + { + "epoch": 0.05837982845622725, + "grad_norm": 336.54730224609375, + "learning_rate": 5.78e-06, + "loss": 29.0429, + "step": 28900 + }, + { + "epoch": 0.058400029088911065, + "grad_norm": 357.3149108886719, + "learning_rate": 5.782000000000001e-06, + "loss": 40.7843, + "step": 28910 + }, + { + "epoch": 0.05842022972159488, + "grad_norm": 407.9903259277344, + "learning_rate": 5.784000000000001e-06, + "loss": 23.2894, + "step": 28920 + }, + { + "epoch": 0.0584404303542787, + "grad_norm": 243.77044677734375, + "learning_rate": 5.786e-06, + "loss": 22.9751, + "step": 28930 + }, + { + "epoch": 0.05846063098696251, + "grad_norm": 109.91024017333984, + "learning_rate": 5.788e-06, + "loss": 24.473, + "step": 28940 + }, + { + "epoch": 0.058480831619646324, + "grad_norm": 378.3873596191406, + "learning_rate": 5.7900000000000005e-06, + "loss": 52.4523, + "step": 28950 + }, + { + "epoch": 0.058501032252330144, + "grad_norm": 290.75213623046875, + "learning_rate": 5.792000000000001e-06, + "loss": 56.7534, + "step": 28960 + }, + { + "epoch": 0.05852123288501396, + "grad_norm": 239.42730712890625, + "learning_rate": 5.794000000000001e-06, + "loss": 23.7062, + "step": 28970 + }, + { + "epoch": 0.05854143351769778, + "grad_norm": 149.8693389892578, + "learning_rate": 5.796000000000001e-06, + "loss": 38.8801, + "step": 28980 + }, + { + "epoch": 0.05856163415038159, + "grad_norm": 311.30963134765625, + "learning_rate": 5.798e-06, + "loss": 34.3625, + "step": 28990 + }, + { + "epoch": 0.058581834783065403, + "grad_norm": 298.5364685058594, + "learning_rate": 5.8e-06, + "loss": 17.1973, + "step": 29000 + }, + { + "epoch": 0.058602035415749223, + "grad_norm": 242.7294921875, + "learning_rate": 5.802000000000001e-06, + "loss": 18.3321, + "step": 29010 + }, + { + "epoch": 0.05862223604843304, + "grad_norm": 378.9987487792969, + "learning_rate": 5.804000000000001e-06, + "loss": 22.1437, + "step": 29020 + }, + { + "epoch": 0.05864243668111685, + "grad_norm": 178.19488525390625, + "learning_rate": 5.806000000000001e-06, + "loss": 40.1051, + "step": 29030 + }, + { + "epoch": 0.05866263731380067, + "grad_norm": 304.3574523925781, + "learning_rate": 5.808e-06, + "loss": 39.7005, + "step": 29040 + }, + { + "epoch": 0.05868283794648448, + "grad_norm": 572.1640014648438, + "learning_rate": 5.81e-06, + "loss": 36.8209, + "step": 29050 + }, + { + "epoch": 0.0587030385791683, + "grad_norm": 188.74029541015625, + "learning_rate": 5.812000000000001e-06, + "loss": 30.6528, + "step": 29060 + }, + { + "epoch": 0.058723239211852116, + "grad_norm": 159.77755737304688, + "learning_rate": 5.814000000000001e-06, + "loss": 28.0834, + "step": 29070 + }, + { + "epoch": 0.05874343984453593, + "grad_norm": 236.99838256835938, + "learning_rate": 5.816000000000001e-06, + "loss": 24.6492, + "step": 29080 + }, + { + "epoch": 0.05876364047721975, + "grad_norm": 182.14535522460938, + "learning_rate": 5.818e-06, + "loss": 25.6222, + "step": 29090 + }, + { + "epoch": 0.05878384110990356, + "grad_norm": 383.1043701171875, + "learning_rate": 5.82e-06, + "loss": 31.2367, + "step": 29100 + }, + { + "epoch": 0.058804041742587375, + "grad_norm": 423.5813293457031, + "learning_rate": 5.822000000000001e-06, + "loss": 28.9585, + "step": 29110 + }, + { + "epoch": 0.058824242375271195, + "grad_norm": 323.51788330078125, + "learning_rate": 5.8240000000000005e-06, + "loss": 36.8512, + "step": 29120 + }, + { + "epoch": 0.05884444300795501, + "grad_norm": 538.1864624023438, + "learning_rate": 5.826000000000001e-06, + "loss": 31.3003, + "step": 29130 + }, + { + "epoch": 0.05886464364063883, + "grad_norm": 235.02914428710938, + "learning_rate": 5.828e-06, + "loss": 17.9188, + "step": 29140 + }, + { + "epoch": 0.05888484427332264, + "grad_norm": 528.1428833007812, + "learning_rate": 5.83e-06, + "loss": 29.4412, + "step": 29150 + }, + { + "epoch": 0.058905044906006454, + "grad_norm": 203.73753356933594, + "learning_rate": 5.832000000000001e-06, + "loss": 18.9356, + "step": 29160 + }, + { + "epoch": 0.058925245538690274, + "grad_norm": 133.8951416015625, + "learning_rate": 5.834e-06, + "loss": 20.8691, + "step": 29170 + }, + { + "epoch": 0.05894544617137409, + "grad_norm": 357.4326171875, + "learning_rate": 5.8360000000000005e-06, + "loss": 27.035, + "step": 29180 + }, + { + "epoch": 0.0589656468040579, + "grad_norm": 231.9644012451172, + "learning_rate": 5.838000000000001e-06, + "loss": 38.3608, + "step": 29190 + }, + { + "epoch": 0.05898584743674172, + "grad_norm": 697.7938232421875, + "learning_rate": 5.84e-06, + "loss": 23.6989, + "step": 29200 + }, + { + "epoch": 0.05900604806942553, + "grad_norm": 510.7744140625, + "learning_rate": 5.842000000000001e-06, + "loss": 32.0237, + "step": 29210 + }, + { + "epoch": 0.05902624870210935, + "grad_norm": 276.2771301269531, + "learning_rate": 5.844000000000001e-06, + "loss": 22.8579, + "step": 29220 + }, + { + "epoch": 0.059046449334793166, + "grad_norm": 437.34490966796875, + "learning_rate": 5.8460000000000004e-06, + "loss": 35.4377, + "step": 29230 + }, + { + "epoch": 0.05906664996747698, + "grad_norm": 505.7890319824219, + "learning_rate": 5.848000000000001e-06, + "loss": 20.1632, + "step": 29240 + }, + { + "epoch": 0.0590868506001608, + "grad_norm": 473.0169982910156, + "learning_rate": 5.85e-06, + "loss": 17.2294, + "step": 29250 + }, + { + "epoch": 0.05910705123284461, + "grad_norm": 142.6421661376953, + "learning_rate": 5.852000000000001e-06, + "loss": 19.0041, + "step": 29260 + }, + { + "epoch": 0.059127251865528425, + "grad_norm": 243.39337158203125, + "learning_rate": 5.854000000000001e-06, + "loss": 39.0202, + "step": 29270 + }, + { + "epoch": 0.059147452498212245, + "grad_norm": 204.61212158203125, + "learning_rate": 5.856e-06, + "loss": 19.2828, + "step": 29280 + }, + { + "epoch": 0.05916765313089606, + "grad_norm": 277.0043029785156, + "learning_rate": 5.8580000000000005e-06, + "loss": 27.9182, + "step": 29290 + }, + { + "epoch": 0.05918785376357988, + "grad_norm": 244.20718383789062, + "learning_rate": 5.86e-06, + "loss": 17.527, + "step": 29300 + }, + { + "epoch": 0.05920805439626369, + "grad_norm": 139.71414184570312, + "learning_rate": 5.862000000000001e-06, + "loss": 28.5032, + "step": 29310 + }, + { + "epoch": 0.059228255028947505, + "grad_norm": 113.04032135009766, + "learning_rate": 5.864000000000001e-06, + "loss": 28.3933, + "step": 29320 + }, + { + "epoch": 0.059248455661631325, + "grad_norm": 357.9707336425781, + "learning_rate": 5.866e-06, + "loss": 25.2754, + "step": 29330 + }, + { + "epoch": 0.05926865629431514, + "grad_norm": 290.7755126953125, + "learning_rate": 5.868e-06, + "loss": 31.9646, + "step": 29340 + }, + { + "epoch": 0.05928885692699895, + "grad_norm": 403.48736572265625, + "learning_rate": 5.8700000000000005e-06, + "loss": 27.902, + "step": 29350 + }, + { + "epoch": 0.05930905755968277, + "grad_norm": 94.61563873291016, + "learning_rate": 5.872000000000001e-06, + "loss": 41.1359, + "step": 29360 + }, + { + "epoch": 0.059329258192366584, + "grad_norm": 272.755615234375, + "learning_rate": 5.874000000000001e-06, + "loss": 22.9157, + "step": 29370 + }, + { + "epoch": 0.059349458825050404, + "grad_norm": 546.8012084960938, + "learning_rate": 5.876000000000001e-06, + "loss": 36.2041, + "step": 29380 + }, + { + "epoch": 0.05936965945773422, + "grad_norm": 219.43817138671875, + "learning_rate": 5.878e-06, + "loss": 36.0726, + "step": 29390 + }, + { + "epoch": 0.05938986009041803, + "grad_norm": 146.97850036621094, + "learning_rate": 5.8800000000000005e-06, + "loss": 45.6488, + "step": 29400 + }, + { + "epoch": 0.05941006072310185, + "grad_norm": 149.57151794433594, + "learning_rate": 5.882e-06, + "loss": 28.8268, + "step": 29410 + }, + { + "epoch": 0.05943026135578566, + "grad_norm": 210.10450744628906, + "learning_rate": 5.884000000000001e-06, + "loss": 28.2961, + "step": 29420 + }, + { + "epoch": 0.059450461988469476, + "grad_norm": 766.3397827148438, + "learning_rate": 5.886000000000001e-06, + "loss": 29.3491, + "step": 29430 + }, + { + "epoch": 0.059470662621153296, + "grad_norm": 628.2882080078125, + "learning_rate": 5.888e-06, + "loss": 29.5668, + "step": 29440 + }, + { + "epoch": 0.05949086325383711, + "grad_norm": 852.3947143554688, + "learning_rate": 5.89e-06, + "loss": 34.4386, + "step": 29450 + }, + { + "epoch": 0.05951106388652093, + "grad_norm": 351.2292175292969, + "learning_rate": 5.892e-06, + "loss": 34.4647, + "step": 29460 + }, + { + "epoch": 0.05953126451920474, + "grad_norm": 313.0265197753906, + "learning_rate": 5.894000000000001e-06, + "loss": 26.2727, + "step": 29470 + }, + { + "epoch": 0.059551465151888555, + "grad_norm": 422.8077697753906, + "learning_rate": 5.896000000000001e-06, + "loss": 30.2988, + "step": 29480 + }, + { + "epoch": 0.059571665784572375, + "grad_norm": 323.49346923828125, + "learning_rate": 5.898e-06, + "loss": 14.5331, + "step": 29490 + }, + { + "epoch": 0.05959186641725619, + "grad_norm": 206.73532104492188, + "learning_rate": 5.9e-06, + "loss": 34.6049, + "step": 29500 + }, + { + "epoch": 0.05961206704994, + "grad_norm": 245.5485382080078, + "learning_rate": 5.9019999999999996e-06, + "loss": 33.8908, + "step": 29510 + }, + { + "epoch": 0.05963226768262382, + "grad_norm": 329.80767822265625, + "learning_rate": 5.9040000000000006e-06, + "loss": 35.755, + "step": 29520 + }, + { + "epoch": 0.059652468315307634, + "grad_norm": 599.2566528320312, + "learning_rate": 5.906000000000001e-06, + "loss": 37.4361, + "step": 29530 + }, + { + "epoch": 0.059672668947991454, + "grad_norm": 309.95556640625, + "learning_rate": 5.908e-06, + "loss": 29.5198, + "step": 29540 + }, + { + "epoch": 0.05969286958067527, + "grad_norm": 354.0465087890625, + "learning_rate": 5.91e-06, + "loss": 26.3363, + "step": 29550 + }, + { + "epoch": 0.05971307021335908, + "grad_norm": 157.65206909179688, + "learning_rate": 5.912e-06, + "loss": 18.1046, + "step": 29560 + }, + { + "epoch": 0.0597332708460429, + "grad_norm": 135.74368286132812, + "learning_rate": 5.9140000000000005e-06, + "loss": 22.1865, + "step": 29570 + }, + { + "epoch": 0.059753471478726713, + "grad_norm": 618.9219970703125, + "learning_rate": 5.916000000000001e-06, + "loss": 40.3919, + "step": 29580 + }, + { + "epoch": 0.05977367211141053, + "grad_norm": 1112.4710693359375, + "learning_rate": 5.918000000000001e-06, + "loss": 29.0231, + "step": 29590 + }, + { + "epoch": 0.05979387274409435, + "grad_norm": 381.2337951660156, + "learning_rate": 5.92e-06, + "loss": 30.8188, + "step": 29600 + }, + { + "epoch": 0.05981407337677816, + "grad_norm": 331.70794677734375, + "learning_rate": 5.922e-06, + "loss": 26.3022, + "step": 29610 + }, + { + "epoch": 0.05983427400946198, + "grad_norm": 208.14129638671875, + "learning_rate": 5.924000000000001e-06, + "loss": 39.2067, + "step": 29620 + }, + { + "epoch": 0.05985447464214579, + "grad_norm": 378.42877197265625, + "learning_rate": 5.9260000000000005e-06, + "loss": 57.0727, + "step": 29630 + }, + { + "epoch": 0.059874675274829606, + "grad_norm": 139.0298309326172, + "learning_rate": 5.928000000000001e-06, + "loss": 19.168, + "step": 29640 + }, + { + "epoch": 0.059894875907513426, + "grad_norm": 226.4697723388672, + "learning_rate": 5.93e-06, + "loss": 24.9292, + "step": 29650 + }, + { + "epoch": 0.05991507654019724, + "grad_norm": 433.94189453125, + "learning_rate": 5.932e-06, + "loss": 23.7183, + "step": 29660 + }, + { + "epoch": 0.05993527717288105, + "grad_norm": 169.28079223632812, + "learning_rate": 5.934000000000001e-06, + "loss": 32.3608, + "step": 29670 + }, + { + "epoch": 0.05995547780556487, + "grad_norm": 168.9295654296875, + "learning_rate": 5.9360000000000004e-06, + "loss": 25.6014, + "step": 29680 + }, + { + "epoch": 0.059975678438248685, + "grad_norm": 136.11447143554688, + "learning_rate": 5.9380000000000006e-06, + "loss": 42.2129, + "step": 29690 + }, + { + "epoch": 0.059995879070932505, + "grad_norm": 150.8358612060547, + "learning_rate": 5.94e-06, + "loss": 26.7088, + "step": 29700 + }, + { + "epoch": 0.06001607970361632, + "grad_norm": 400.7820739746094, + "learning_rate": 5.942e-06, + "loss": 25.5076, + "step": 29710 + }, + { + "epoch": 0.06003628033630013, + "grad_norm": 749.4679565429688, + "learning_rate": 5.944000000000001e-06, + "loss": 32.5039, + "step": 29720 + }, + { + "epoch": 0.06005648096898395, + "grad_norm": 631.8466796875, + "learning_rate": 5.946e-06, + "loss": 30.8828, + "step": 29730 + }, + { + "epoch": 0.060076681601667764, + "grad_norm": 219.17079162597656, + "learning_rate": 5.9480000000000005e-06, + "loss": 37.6815, + "step": 29740 + }, + { + "epoch": 0.06009688223435158, + "grad_norm": 386.05975341796875, + "learning_rate": 5.950000000000001e-06, + "loss": 32.4698, + "step": 29750 + }, + { + "epoch": 0.0601170828670354, + "grad_norm": 212.24620056152344, + "learning_rate": 5.952e-06, + "loss": 20.2035, + "step": 29760 + }, + { + "epoch": 0.06013728349971921, + "grad_norm": 238.9572296142578, + "learning_rate": 5.954000000000001e-06, + "loss": 13.4167, + "step": 29770 + }, + { + "epoch": 0.06015748413240303, + "grad_norm": 165.2470703125, + "learning_rate": 5.956000000000001e-06, + "loss": 33.2291, + "step": 29780 + }, + { + "epoch": 0.06017768476508684, + "grad_norm": 300.0216064453125, + "learning_rate": 5.958e-06, + "loss": 36.237, + "step": 29790 + }, + { + "epoch": 0.060197885397770656, + "grad_norm": 821.6015014648438, + "learning_rate": 5.9600000000000005e-06, + "loss": 30.3512, + "step": 29800 + }, + { + "epoch": 0.060218086030454476, + "grad_norm": 2150.255615234375, + "learning_rate": 5.962e-06, + "loss": 36.7762, + "step": 29810 + }, + { + "epoch": 0.06023828666313829, + "grad_norm": 508.111572265625, + "learning_rate": 5.964000000000001e-06, + "loss": 18.1586, + "step": 29820 + }, + { + "epoch": 0.0602584872958221, + "grad_norm": 1313.7850341796875, + "learning_rate": 5.966000000000001e-06, + "loss": 43.5832, + "step": 29830 + }, + { + "epoch": 0.06027868792850592, + "grad_norm": 229.95068359375, + "learning_rate": 5.968e-06, + "loss": 19.3392, + "step": 29840 + }, + { + "epoch": 0.060298888561189735, + "grad_norm": 296.32421875, + "learning_rate": 5.9700000000000004e-06, + "loss": 13.511, + "step": 29850 + }, + { + "epoch": 0.060319089193873555, + "grad_norm": 491.4680480957031, + "learning_rate": 5.972e-06, + "loss": 50.0978, + "step": 29860 + }, + { + "epoch": 0.06033928982655737, + "grad_norm": 830.8683471679688, + "learning_rate": 5.974000000000001e-06, + "loss": 39.9874, + "step": 29870 + }, + { + "epoch": 0.06035949045924118, + "grad_norm": 277.2486877441406, + "learning_rate": 5.976000000000001e-06, + "loss": 19.6756, + "step": 29880 + }, + { + "epoch": 0.060379691091925, + "grad_norm": 227.3853302001953, + "learning_rate": 5.978e-06, + "loss": 24.5435, + "step": 29890 + }, + { + "epoch": 0.060399891724608815, + "grad_norm": 396.1295166015625, + "learning_rate": 5.98e-06, + "loss": 34.9831, + "step": 29900 + }, + { + "epoch": 0.06042009235729263, + "grad_norm": 119.34272003173828, + "learning_rate": 5.982e-06, + "loss": 17.9223, + "step": 29910 + }, + { + "epoch": 0.06044029298997645, + "grad_norm": 179.78720092773438, + "learning_rate": 5.984000000000001e-06, + "loss": 19.886, + "step": 29920 + }, + { + "epoch": 0.06046049362266026, + "grad_norm": 241.3793487548828, + "learning_rate": 5.986000000000001e-06, + "loss": 25.6962, + "step": 29930 + }, + { + "epoch": 0.06048069425534408, + "grad_norm": 481.5415954589844, + "learning_rate": 5.988e-06, + "loss": 28.2497, + "step": 29940 + }, + { + "epoch": 0.060500894888027894, + "grad_norm": 267.5933837890625, + "learning_rate": 5.99e-06, + "loss": 36.0274, + "step": 29950 + }, + { + "epoch": 0.06052109552071171, + "grad_norm": 139.83847045898438, + "learning_rate": 5.992e-06, + "loss": 23.3543, + "step": 29960 + }, + { + "epoch": 0.06054129615339553, + "grad_norm": 136.5538787841797, + "learning_rate": 5.9940000000000005e-06, + "loss": 26.1079, + "step": 29970 + }, + { + "epoch": 0.06056149678607934, + "grad_norm": 284.79254150390625, + "learning_rate": 5.996000000000001e-06, + "loss": 21.3011, + "step": 29980 + }, + { + "epoch": 0.06058169741876315, + "grad_norm": 660.3438720703125, + "learning_rate": 5.998000000000001e-06, + "loss": 29.8056, + "step": 29990 + }, + { + "epoch": 0.06060189805144697, + "grad_norm": 293.27874755859375, + "learning_rate": 6e-06, + "loss": 32.3305, + "step": 30000 + }, + { + "epoch": 0.060622098684130786, + "grad_norm": 114.87731170654297, + "learning_rate": 6.002e-06, + "loss": 31.125, + "step": 30010 + }, + { + "epoch": 0.060642299316814606, + "grad_norm": 225.98101806640625, + "learning_rate": 6.004000000000001e-06, + "loss": 29.0658, + "step": 30020 + }, + { + "epoch": 0.06066249994949842, + "grad_norm": 150.27601623535156, + "learning_rate": 6.006000000000001e-06, + "loss": 25.6947, + "step": 30030 + }, + { + "epoch": 0.06068270058218223, + "grad_norm": 497.3393249511719, + "learning_rate": 6.008000000000001e-06, + "loss": 27.0551, + "step": 30040 + }, + { + "epoch": 0.06070290121486605, + "grad_norm": 457.775146484375, + "learning_rate": 6.01e-06, + "loss": 27.2113, + "step": 30050 + }, + { + "epoch": 0.060723101847549865, + "grad_norm": 251.35533142089844, + "learning_rate": 6.012e-06, + "loss": 35.466, + "step": 30060 + }, + { + "epoch": 0.06074330248023368, + "grad_norm": 185.10580444335938, + "learning_rate": 6.014000000000001e-06, + "loss": 26.3948, + "step": 30070 + }, + { + "epoch": 0.0607635031129175, + "grad_norm": 156.08596801757812, + "learning_rate": 6.0160000000000005e-06, + "loss": 24.4032, + "step": 30080 + }, + { + "epoch": 0.06078370374560131, + "grad_norm": 404.8869934082031, + "learning_rate": 6.018000000000001e-06, + "loss": 28.049, + "step": 30090 + }, + { + "epoch": 0.06080390437828513, + "grad_norm": 307.8752136230469, + "learning_rate": 6.02e-06, + "loss": 31.1489, + "step": 30100 + }, + { + "epoch": 0.060824105010968944, + "grad_norm": 751.0390625, + "learning_rate": 6.022e-06, + "loss": 32.456, + "step": 30110 + }, + { + "epoch": 0.06084430564365276, + "grad_norm": 133.58175659179688, + "learning_rate": 6.024000000000001e-06, + "loss": 19.7639, + "step": 30120 + }, + { + "epoch": 0.06086450627633658, + "grad_norm": 276.15557861328125, + "learning_rate": 6.026e-06, + "loss": 44.7161, + "step": 30130 + }, + { + "epoch": 0.06088470690902039, + "grad_norm": 220.90367126464844, + "learning_rate": 6.0280000000000006e-06, + "loss": 25.6417, + "step": 30140 + }, + { + "epoch": 0.060904907541704204, + "grad_norm": 304.4405822753906, + "learning_rate": 6.030000000000001e-06, + "loss": 35.9571, + "step": 30150 + }, + { + "epoch": 0.060925108174388024, + "grad_norm": 359.6705627441406, + "learning_rate": 6.032e-06, + "loss": 39.6983, + "step": 30160 + }, + { + "epoch": 0.06094530880707184, + "grad_norm": 237.6664581298828, + "learning_rate": 6.034000000000001e-06, + "loss": 22.8935, + "step": 30170 + }, + { + "epoch": 0.06096550943975566, + "grad_norm": 879.8140869140625, + "learning_rate": 6.036000000000001e-06, + "loss": 41.0201, + "step": 30180 + }, + { + "epoch": 0.06098571007243947, + "grad_norm": 454.46734619140625, + "learning_rate": 6.0380000000000005e-06, + "loss": 27.0023, + "step": 30190 + }, + { + "epoch": 0.06100591070512328, + "grad_norm": 564.1406860351562, + "learning_rate": 6.040000000000001e-06, + "loss": 40.1812, + "step": 30200 + }, + { + "epoch": 0.0610261113378071, + "grad_norm": 5.1078667640686035, + "learning_rate": 6.042e-06, + "loss": 24.9429, + "step": 30210 + }, + { + "epoch": 0.061046311970490916, + "grad_norm": 154.20806884765625, + "learning_rate": 6.044000000000001e-06, + "loss": 18.952, + "step": 30220 + }, + { + "epoch": 0.06106651260317473, + "grad_norm": 340.8790588378906, + "learning_rate": 6.046000000000001e-06, + "loss": 34.2021, + "step": 30230 + }, + { + "epoch": 0.06108671323585855, + "grad_norm": 172.2555389404297, + "learning_rate": 6.048e-06, + "loss": 30.5673, + "step": 30240 + }, + { + "epoch": 0.06110691386854236, + "grad_norm": 318.4809265136719, + "learning_rate": 6.0500000000000005e-06, + "loss": 13.6786, + "step": 30250 + }, + { + "epoch": 0.06112711450122618, + "grad_norm": 360.1896667480469, + "learning_rate": 6.052e-06, + "loss": 24.7746, + "step": 30260 + }, + { + "epoch": 0.061147315133909995, + "grad_norm": 319.43060302734375, + "learning_rate": 6.054000000000001e-06, + "loss": 41.7367, + "step": 30270 + }, + { + "epoch": 0.06116751576659381, + "grad_norm": 565.7113647460938, + "learning_rate": 6.056000000000001e-06, + "loss": 26.4553, + "step": 30280 + }, + { + "epoch": 0.06118771639927763, + "grad_norm": 480.1114501953125, + "learning_rate": 6.058e-06, + "loss": 20.8778, + "step": 30290 + }, + { + "epoch": 0.06120791703196144, + "grad_norm": 239.09214782714844, + "learning_rate": 6.0600000000000004e-06, + "loss": 41.7371, + "step": 30300 + }, + { + "epoch": 0.061228117664645254, + "grad_norm": 453.1701965332031, + "learning_rate": 6.062e-06, + "loss": 52.8978, + "step": 30310 + }, + { + "epoch": 0.061248318297329074, + "grad_norm": 503.0354309082031, + "learning_rate": 6.064000000000001e-06, + "loss": 31.5805, + "step": 30320 + }, + { + "epoch": 0.06126851893001289, + "grad_norm": 299.6436767578125, + "learning_rate": 6.066000000000001e-06, + "loss": 18.5376, + "step": 30330 + }, + { + "epoch": 0.06128871956269671, + "grad_norm": 326.1717224121094, + "learning_rate": 6.068e-06, + "loss": 39.5791, + "step": 30340 + }, + { + "epoch": 0.06130892019538052, + "grad_norm": 107.300048828125, + "learning_rate": 6.07e-06, + "loss": 26.4288, + "step": 30350 + }, + { + "epoch": 0.06132912082806433, + "grad_norm": 548.6287841796875, + "learning_rate": 6.0720000000000005e-06, + "loss": 27.0214, + "step": 30360 + }, + { + "epoch": 0.06134932146074815, + "grad_norm": 221.74899291992188, + "learning_rate": 6.074000000000001e-06, + "loss": 33.1325, + "step": 30370 + }, + { + "epoch": 0.061369522093431966, + "grad_norm": 245.58372497558594, + "learning_rate": 6.076000000000001e-06, + "loss": 23.2985, + "step": 30380 + }, + { + "epoch": 0.06138972272611578, + "grad_norm": 458.6068115234375, + "learning_rate": 6.078000000000001e-06, + "loss": 32.0391, + "step": 30390 + }, + { + "epoch": 0.0614099233587996, + "grad_norm": 251.84043884277344, + "learning_rate": 6.08e-06, + "loss": 39.0284, + "step": 30400 + }, + { + "epoch": 0.06143012399148341, + "grad_norm": 311.1361389160156, + "learning_rate": 6.082e-06, + "loss": 28.016, + "step": 30410 + }, + { + "epoch": 0.061450324624167225, + "grad_norm": 208.8995361328125, + "learning_rate": 6.084000000000001e-06, + "loss": 38.4094, + "step": 30420 + }, + { + "epoch": 0.061470525256851045, + "grad_norm": 204.97857666015625, + "learning_rate": 6.086000000000001e-06, + "loss": 29.982, + "step": 30430 + }, + { + "epoch": 0.06149072588953486, + "grad_norm": 351.9819030761719, + "learning_rate": 6.088000000000001e-06, + "loss": 43.009, + "step": 30440 + }, + { + "epoch": 0.06151092652221868, + "grad_norm": 0.0, + "learning_rate": 6.09e-06, + "loss": 16.1789, + "step": 30450 + }, + { + "epoch": 0.06153112715490249, + "grad_norm": 102.66375732421875, + "learning_rate": 6.092e-06, + "loss": 15.9975, + "step": 30460 + }, + { + "epoch": 0.061551327787586305, + "grad_norm": 301.2502136230469, + "learning_rate": 6.094000000000001e-06, + "loss": 24.2382, + "step": 30470 + }, + { + "epoch": 0.061571528420270125, + "grad_norm": 441.5412902832031, + "learning_rate": 6.096000000000001e-06, + "loss": 22.9086, + "step": 30480 + }, + { + "epoch": 0.06159172905295394, + "grad_norm": 298.14483642578125, + "learning_rate": 6.098000000000001e-06, + "loss": 42.6522, + "step": 30490 + }, + { + "epoch": 0.06161192968563775, + "grad_norm": 222.50039672851562, + "learning_rate": 6.1e-06, + "loss": 34.7548, + "step": 30500 + }, + { + "epoch": 0.06163213031832157, + "grad_norm": 199.57862854003906, + "learning_rate": 6.102e-06, + "loss": 16.1869, + "step": 30510 + }, + { + "epoch": 0.061652330951005384, + "grad_norm": 115.16866302490234, + "learning_rate": 6.104000000000001e-06, + "loss": 21.8251, + "step": 30520 + }, + { + "epoch": 0.061672531583689204, + "grad_norm": 238.4964599609375, + "learning_rate": 6.1060000000000005e-06, + "loss": 32.4652, + "step": 30530 + }, + { + "epoch": 0.06169273221637302, + "grad_norm": 505.0570068359375, + "learning_rate": 6.108000000000001e-06, + "loss": 44.994, + "step": 30540 + }, + { + "epoch": 0.06171293284905683, + "grad_norm": 402.1174011230469, + "learning_rate": 6.110000000000001e-06, + "loss": 25.0367, + "step": 30550 + }, + { + "epoch": 0.06173313348174065, + "grad_norm": 77.92440795898438, + "learning_rate": 6.112e-06, + "loss": 24.3771, + "step": 30560 + }, + { + "epoch": 0.06175333411442446, + "grad_norm": 43.44609832763672, + "learning_rate": 6.114000000000001e-06, + "loss": 16.8635, + "step": 30570 + }, + { + "epoch": 0.061773534747108276, + "grad_norm": 104.65845489501953, + "learning_rate": 6.116000000000001e-06, + "loss": 25.8675, + "step": 30580 + }, + { + "epoch": 0.061793735379792096, + "grad_norm": 294.6768493652344, + "learning_rate": 6.1180000000000005e-06, + "loss": 36.6797, + "step": 30590 + }, + { + "epoch": 0.06181393601247591, + "grad_norm": 674.5152587890625, + "learning_rate": 6.120000000000001e-06, + "loss": 22.9528, + "step": 30600 + }, + { + "epoch": 0.06183413664515973, + "grad_norm": 177.4771728515625, + "learning_rate": 6.122e-06, + "loss": 31.4151, + "step": 30610 + }, + { + "epoch": 0.06185433727784354, + "grad_norm": 151.2828369140625, + "learning_rate": 6.124000000000001e-06, + "loss": 33.3632, + "step": 30620 + }, + { + "epoch": 0.061874537910527355, + "grad_norm": 82.88737487792969, + "learning_rate": 6.126000000000001e-06, + "loss": 25.3052, + "step": 30630 + }, + { + "epoch": 0.061894738543211175, + "grad_norm": 496.4389343261719, + "learning_rate": 6.1280000000000005e-06, + "loss": 31.512, + "step": 30640 + }, + { + "epoch": 0.06191493917589499, + "grad_norm": 372.21466064453125, + "learning_rate": 6.130000000000001e-06, + "loss": 18.1448, + "step": 30650 + }, + { + "epoch": 0.0619351398085788, + "grad_norm": 576.8009033203125, + "learning_rate": 6.132e-06, + "loss": 23.0323, + "step": 30660 + }, + { + "epoch": 0.06195534044126262, + "grad_norm": 256.25958251953125, + "learning_rate": 6.134e-06, + "loss": 20.0009, + "step": 30670 + }, + { + "epoch": 0.061975541073946434, + "grad_norm": 240.95333862304688, + "learning_rate": 6.136000000000001e-06, + "loss": 53.408, + "step": 30680 + }, + { + "epoch": 0.061995741706630254, + "grad_norm": 934.2208862304688, + "learning_rate": 6.138e-06, + "loss": 28.7069, + "step": 30690 + }, + { + "epoch": 0.06201594233931407, + "grad_norm": 139.9613494873047, + "learning_rate": 6.1400000000000005e-06, + "loss": 28.2148, + "step": 30700 + }, + { + "epoch": 0.06203614297199788, + "grad_norm": 492.6600646972656, + "learning_rate": 6.142e-06, + "loss": 28.5997, + "step": 30710 + }, + { + "epoch": 0.0620563436046817, + "grad_norm": 224.6328582763672, + "learning_rate": 6.144e-06, + "loss": 16.2472, + "step": 30720 + }, + { + "epoch": 0.062076544237365514, + "grad_norm": 581.1234130859375, + "learning_rate": 6.146000000000001e-06, + "loss": 20.6001, + "step": 30730 + }, + { + "epoch": 0.06209674487004933, + "grad_norm": 419.7743835449219, + "learning_rate": 6.148e-06, + "loss": 30.4461, + "step": 30740 + }, + { + "epoch": 0.06211694550273315, + "grad_norm": 193.66250610351562, + "learning_rate": 6.15e-06, + "loss": 28.1716, + "step": 30750 + }, + { + "epoch": 0.06213714613541696, + "grad_norm": 113.6976318359375, + "learning_rate": 6.1520000000000006e-06, + "loss": 20.0639, + "step": 30760 + }, + { + "epoch": 0.06215734676810078, + "grad_norm": 338.61114501953125, + "learning_rate": 6.154e-06, + "loss": 34.2324, + "step": 30770 + }, + { + "epoch": 0.06217754740078459, + "grad_norm": 346.8319396972656, + "learning_rate": 6.156000000000001e-06, + "loss": 25.6069, + "step": 30780 + }, + { + "epoch": 0.062197748033468406, + "grad_norm": 170.17576599121094, + "learning_rate": 6.158000000000001e-06, + "loss": 37.54, + "step": 30790 + }, + { + "epoch": 0.062217948666152226, + "grad_norm": 621.9466552734375, + "learning_rate": 6.16e-06, + "loss": 30.7512, + "step": 30800 + }, + { + "epoch": 0.06223814929883604, + "grad_norm": 348.8072204589844, + "learning_rate": 6.1620000000000005e-06, + "loss": 31.0073, + "step": 30810 + }, + { + "epoch": 0.06225834993151985, + "grad_norm": 85.53361511230469, + "learning_rate": 6.164e-06, + "loss": 38.4568, + "step": 30820 + }, + { + "epoch": 0.06227855056420367, + "grad_norm": 825.8348999023438, + "learning_rate": 6.166000000000001e-06, + "loss": 33.1891, + "step": 30830 + }, + { + "epoch": 0.062298751196887485, + "grad_norm": 377.9906921386719, + "learning_rate": 6.168000000000001e-06, + "loss": 21.0851, + "step": 30840 + }, + { + "epoch": 0.062318951829571305, + "grad_norm": 111.96500396728516, + "learning_rate": 6.17e-06, + "loss": 22.7266, + "step": 30850 + }, + { + "epoch": 0.06233915246225512, + "grad_norm": 224.50840759277344, + "learning_rate": 6.172e-06, + "loss": 20.1645, + "step": 30860 + }, + { + "epoch": 0.06235935309493893, + "grad_norm": 263.6773986816406, + "learning_rate": 6.174e-06, + "loss": 22.9404, + "step": 30870 + }, + { + "epoch": 0.06237955372762275, + "grad_norm": 698.050537109375, + "learning_rate": 6.176000000000001e-06, + "loss": 59.9724, + "step": 30880 + }, + { + "epoch": 0.062399754360306564, + "grad_norm": 265.96722412109375, + "learning_rate": 6.178000000000001e-06, + "loss": 16.1057, + "step": 30890 + }, + { + "epoch": 0.06241995499299038, + "grad_norm": 248.72235107421875, + "learning_rate": 6.18e-06, + "loss": 36.1049, + "step": 30900 + }, + { + "epoch": 0.0624401556256742, + "grad_norm": 650.3839721679688, + "learning_rate": 6.182e-06, + "loss": 26.4691, + "step": 30910 + }, + { + "epoch": 0.06246035625835801, + "grad_norm": 492.22845458984375, + "learning_rate": 6.184e-06, + "loss": 15.0181, + "step": 30920 + }, + { + "epoch": 0.06248055689104183, + "grad_norm": 378.4203186035156, + "learning_rate": 6.1860000000000006e-06, + "loss": 36.7387, + "step": 30930 + }, + { + "epoch": 0.06250075752372564, + "grad_norm": 249.523681640625, + "learning_rate": 6.188000000000001e-06, + "loss": 28.3755, + "step": 30940 + }, + { + "epoch": 0.06252095815640946, + "grad_norm": 113.86726379394531, + "learning_rate": 6.190000000000001e-06, + "loss": 30.0943, + "step": 30950 + }, + { + "epoch": 0.06254115878909328, + "grad_norm": 415.4285888671875, + "learning_rate": 6.192e-06, + "loss": 20.979, + "step": 30960 + }, + { + "epoch": 0.0625613594217771, + "grad_norm": 153.79115295410156, + "learning_rate": 6.194e-06, + "loss": 34.6167, + "step": 30970 + }, + { + "epoch": 0.0625815600544609, + "grad_norm": 390.0386047363281, + "learning_rate": 6.196000000000001e-06, + "loss": 37.6308, + "step": 30980 + }, + { + "epoch": 0.06260176068714472, + "grad_norm": 786.6076049804688, + "learning_rate": 6.198000000000001e-06, + "loss": 19.3254, + "step": 30990 + }, + { + "epoch": 0.06262196131982854, + "grad_norm": 593.9253540039062, + "learning_rate": 6.200000000000001e-06, + "loss": 45.8376, + "step": 31000 + }, + { + "epoch": 0.06264216195251235, + "grad_norm": 267.5792236328125, + "learning_rate": 6.202e-06, + "loss": 19.2077, + "step": 31010 + }, + { + "epoch": 0.06266236258519617, + "grad_norm": 1246.27587890625, + "learning_rate": 6.204e-06, + "loss": 29.5188, + "step": 31020 + }, + { + "epoch": 0.06268256321787999, + "grad_norm": 200.8495330810547, + "learning_rate": 6.206000000000001e-06, + "loss": 18.708, + "step": 31030 + }, + { + "epoch": 0.0627027638505638, + "grad_norm": 508.18182373046875, + "learning_rate": 6.2080000000000005e-06, + "loss": 31.8805, + "step": 31040 + }, + { + "epoch": 0.06272296448324761, + "grad_norm": 369.0675048828125, + "learning_rate": 6.210000000000001e-06, + "loss": 25.7357, + "step": 31050 + }, + { + "epoch": 0.06274316511593143, + "grad_norm": 474.58843994140625, + "learning_rate": 6.212e-06, + "loss": 38.1484, + "step": 31060 + }, + { + "epoch": 0.06276336574861524, + "grad_norm": 140.6695556640625, + "learning_rate": 6.214e-06, + "loss": 26.6929, + "step": 31070 + }, + { + "epoch": 0.06278356638129906, + "grad_norm": 300.88787841796875, + "learning_rate": 6.216000000000001e-06, + "loss": 17.8964, + "step": 31080 + }, + { + "epoch": 0.06280376701398288, + "grad_norm": 99.71508026123047, + "learning_rate": 6.2180000000000004e-06, + "loss": 24.024, + "step": 31090 + }, + { + "epoch": 0.06282396764666669, + "grad_norm": 702.80908203125, + "learning_rate": 6.220000000000001e-06, + "loss": 32.6589, + "step": 31100 + }, + { + "epoch": 0.0628441682793505, + "grad_norm": 196.9779510498047, + "learning_rate": 6.222e-06, + "loss": 31.1702, + "step": 31110 + }, + { + "epoch": 0.06286436891203433, + "grad_norm": 408.7449035644531, + "learning_rate": 6.224e-06, + "loss": 35.4531, + "step": 31120 + }, + { + "epoch": 0.06288456954471815, + "grad_norm": 805.6083984375, + "learning_rate": 6.226000000000001e-06, + "loss": 47.5391, + "step": 31130 + }, + { + "epoch": 0.06290477017740195, + "grad_norm": 193.94830322265625, + "learning_rate": 6.228e-06, + "loss": 21.1073, + "step": 31140 + }, + { + "epoch": 0.06292497081008577, + "grad_norm": 92.54554748535156, + "learning_rate": 6.2300000000000005e-06, + "loss": 20.5223, + "step": 31150 + }, + { + "epoch": 0.06294517144276959, + "grad_norm": 220.41717529296875, + "learning_rate": 6.232000000000001e-06, + "loss": 23.0221, + "step": 31160 + }, + { + "epoch": 0.0629653720754534, + "grad_norm": 487.35113525390625, + "learning_rate": 6.234e-06, + "loss": 46.811, + "step": 31170 + }, + { + "epoch": 0.06298557270813722, + "grad_norm": 244.32127380371094, + "learning_rate": 6.236000000000001e-06, + "loss": 38.5928, + "step": 31180 + }, + { + "epoch": 0.06300577334082104, + "grad_norm": 713.2606201171875, + "learning_rate": 6.238000000000001e-06, + "loss": 34.265, + "step": 31190 + }, + { + "epoch": 0.06302597397350485, + "grad_norm": 602.4276123046875, + "learning_rate": 6.24e-06, + "loss": 44.2316, + "step": 31200 + }, + { + "epoch": 0.06304617460618867, + "grad_norm": 248.2340850830078, + "learning_rate": 6.2420000000000005e-06, + "loss": 15.4166, + "step": 31210 + }, + { + "epoch": 0.06306637523887249, + "grad_norm": 357.87896728515625, + "learning_rate": 6.244e-06, + "loss": 38.5904, + "step": 31220 + }, + { + "epoch": 0.06308657587155629, + "grad_norm": 204.51394653320312, + "learning_rate": 6.246000000000001e-06, + "loss": 28.7903, + "step": 31230 + }, + { + "epoch": 0.06310677650424011, + "grad_norm": 668.6724243164062, + "learning_rate": 6.248000000000001e-06, + "loss": 30.186, + "step": 31240 + }, + { + "epoch": 0.06312697713692393, + "grad_norm": 209.66392517089844, + "learning_rate": 6.25e-06, + "loss": 30.3958, + "step": 31250 + }, + { + "epoch": 0.06314717776960774, + "grad_norm": 170.11233520507812, + "learning_rate": 6.2520000000000004e-06, + "loss": 27.4409, + "step": 31260 + }, + { + "epoch": 0.06316737840229156, + "grad_norm": 0.0, + "learning_rate": 6.254e-06, + "loss": 35.9831, + "step": 31270 + }, + { + "epoch": 0.06318757903497538, + "grad_norm": 345.4881591796875, + "learning_rate": 6.256000000000001e-06, + "loss": 18.772, + "step": 31280 + }, + { + "epoch": 0.0632077796676592, + "grad_norm": 433.49755859375, + "learning_rate": 6.258000000000001e-06, + "loss": 32.9236, + "step": 31290 + }, + { + "epoch": 0.063227980300343, + "grad_norm": 326.5909729003906, + "learning_rate": 6.26e-06, + "loss": 20.2967, + "step": 31300 + }, + { + "epoch": 0.06324818093302682, + "grad_norm": 27.680173873901367, + "learning_rate": 6.262e-06, + "loss": 33.7101, + "step": 31310 + }, + { + "epoch": 0.06326838156571064, + "grad_norm": 391.7144470214844, + "learning_rate": 6.264e-06, + "loss": 41.8141, + "step": 31320 + }, + { + "epoch": 0.06328858219839445, + "grad_norm": 102.66294860839844, + "learning_rate": 6.266000000000001e-06, + "loss": 27.3469, + "step": 31330 + }, + { + "epoch": 0.06330878283107827, + "grad_norm": 245.99940490722656, + "learning_rate": 6.268000000000001e-06, + "loss": 21.1016, + "step": 31340 + }, + { + "epoch": 0.06332898346376209, + "grad_norm": 250.53732299804688, + "learning_rate": 6.27e-06, + "loss": 43.6522, + "step": 31350 + }, + { + "epoch": 0.0633491840964459, + "grad_norm": 357.2115783691406, + "learning_rate": 6.272e-06, + "loss": 25.7863, + "step": 31360 + }, + { + "epoch": 0.06336938472912972, + "grad_norm": 429.6044006347656, + "learning_rate": 6.274e-06, + "loss": 28.0361, + "step": 31370 + }, + { + "epoch": 0.06338958536181354, + "grad_norm": 306.50421142578125, + "learning_rate": 6.2760000000000006e-06, + "loss": 33.9684, + "step": 31380 + }, + { + "epoch": 0.06340978599449734, + "grad_norm": 193.95425415039062, + "learning_rate": 6.278000000000001e-06, + "loss": 34.4685, + "step": 31390 + }, + { + "epoch": 0.06342998662718116, + "grad_norm": 387.4499816894531, + "learning_rate": 6.280000000000001e-06, + "loss": 23.8366, + "step": 31400 + }, + { + "epoch": 0.06345018725986498, + "grad_norm": 345.3521728515625, + "learning_rate": 6.282e-06, + "loss": 27.6357, + "step": 31410 + }, + { + "epoch": 0.06347038789254879, + "grad_norm": 182.93849182128906, + "learning_rate": 6.284e-06, + "loss": 22.5396, + "step": 31420 + }, + { + "epoch": 0.06349058852523261, + "grad_norm": 446.3598937988281, + "learning_rate": 6.286000000000001e-06, + "loss": 29.8193, + "step": 31430 + }, + { + "epoch": 0.06351078915791643, + "grad_norm": 258.4228515625, + "learning_rate": 6.288000000000001e-06, + "loss": 40.5369, + "step": 31440 + }, + { + "epoch": 0.06353098979060025, + "grad_norm": 193.42092895507812, + "learning_rate": 6.290000000000001e-06, + "loss": 24.5259, + "step": 31450 + }, + { + "epoch": 0.06355119042328405, + "grad_norm": 681.1078491210938, + "learning_rate": 6.292e-06, + "loss": 26.9826, + "step": 31460 + }, + { + "epoch": 0.06357139105596787, + "grad_norm": 435.4959716796875, + "learning_rate": 6.294e-06, + "loss": 39.3159, + "step": 31470 + }, + { + "epoch": 0.0635915916886517, + "grad_norm": 251.05712890625, + "learning_rate": 6.296000000000001e-06, + "loss": 29.1976, + "step": 31480 + }, + { + "epoch": 0.0636117923213355, + "grad_norm": 446.5157165527344, + "learning_rate": 6.2980000000000005e-06, + "loss": 32.6056, + "step": 31490 + }, + { + "epoch": 0.06363199295401932, + "grad_norm": 611.6644287109375, + "learning_rate": 6.300000000000001e-06, + "loss": 31.2819, + "step": 31500 + }, + { + "epoch": 0.06365219358670314, + "grad_norm": 117.74989318847656, + "learning_rate": 6.302e-06, + "loss": 30.5737, + "step": 31510 + }, + { + "epoch": 0.06367239421938695, + "grad_norm": 289.89520263671875, + "learning_rate": 6.304e-06, + "loss": 18.713, + "step": 31520 + }, + { + "epoch": 0.06369259485207077, + "grad_norm": 272.0415954589844, + "learning_rate": 6.306000000000001e-06, + "loss": 37.7071, + "step": 31530 + }, + { + "epoch": 0.06371279548475459, + "grad_norm": 110.60374450683594, + "learning_rate": 6.308e-06, + "loss": 18.2154, + "step": 31540 + }, + { + "epoch": 0.06373299611743839, + "grad_norm": 244.5205841064453, + "learning_rate": 6.3100000000000006e-06, + "loss": 26.8455, + "step": 31550 + }, + { + "epoch": 0.06375319675012221, + "grad_norm": 417.72833251953125, + "learning_rate": 6.312000000000001e-06, + "loss": 33.2194, + "step": 31560 + }, + { + "epoch": 0.06377339738280603, + "grad_norm": 368.2216796875, + "learning_rate": 6.314e-06, + "loss": 14.8553, + "step": 31570 + }, + { + "epoch": 0.06379359801548984, + "grad_norm": 584.34375, + "learning_rate": 6.316000000000001e-06, + "loss": 36.6712, + "step": 31580 + }, + { + "epoch": 0.06381379864817366, + "grad_norm": 198.16441345214844, + "learning_rate": 6.318000000000001e-06, + "loss": 19.6327, + "step": 31590 + }, + { + "epoch": 0.06383399928085748, + "grad_norm": 81.60736846923828, + "learning_rate": 6.3200000000000005e-06, + "loss": 43.3337, + "step": 31600 + }, + { + "epoch": 0.0638541999135413, + "grad_norm": 449.2483215332031, + "learning_rate": 6.322000000000001e-06, + "loss": 32.1987, + "step": 31610 + }, + { + "epoch": 0.0638744005462251, + "grad_norm": 416.2035217285156, + "learning_rate": 6.324e-06, + "loss": 16.5998, + "step": 31620 + }, + { + "epoch": 0.06389460117890892, + "grad_norm": 381.83648681640625, + "learning_rate": 6.326000000000001e-06, + "loss": 26.0645, + "step": 31630 + }, + { + "epoch": 0.06391480181159274, + "grad_norm": 440.89459228515625, + "learning_rate": 6.328000000000001e-06, + "loss": 17.4892, + "step": 31640 + }, + { + "epoch": 0.06393500244427655, + "grad_norm": 371.1136169433594, + "learning_rate": 6.33e-06, + "loss": 24.7386, + "step": 31650 + }, + { + "epoch": 0.06395520307696037, + "grad_norm": 351.368896484375, + "learning_rate": 6.3320000000000005e-06, + "loss": 38.4869, + "step": 31660 + }, + { + "epoch": 0.06397540370964419, + "grad_norm": 278.1321716308594, + "learning_rate": 6.334e-06, + "loss": 6.4429, + "step": 31670 + }, + { + "epoch": 0.063995604342328, + "grad_norm": 1031.544677734375, + "learning_rate": 6.336000000000001e-06, + "loss": 36.5749, + "step": 31680 + }, + { + "epoch": 0.06401580497501182, + "grad_norm": 260.3399658203125, + "learning_rate": 6.338000000000001e-06, + "loss": 28.7344, + "step": 31690 + }, + { + "epoch": 0.06403600560769564, + "grad_norm": 286.3230285644531, + "learning_rate": 6.34e-06, + "loss": 23.257, + "step": 31700 + }, + { + "epoch": 0.06405620624037944, + "grad_norm": 395.49169921875, + "learning_rate": 6.3420000000000004e-06, + "loss": 47.73, + "step": 31710 + }, + { + "epoch": 0.06407640687306326, + "grad_norm": 172.02305603027344, + "learning_rate": 6.344e-06, + "loss": 25.7619, + "step": 31720 + }, + { + "epoch": 0.06409660750574708, + "grad_norm": 28.446199417114258, + "learning_rate": 6.346000000000001e-06, + "loss": 27.4512, + "step": 31730 + }, + { + "epoch": 0.06411680813843089, + "grad_norm": 358.0504150390625, + "learning_rate": 6.348000000000001e-06, + "loss": 34.4209, + "step": 31740 + }, + { + "epoch": 0.06413700877111471, + "grad_norm": 215.0141143798828, + "learning_rate": 6.35e-06, + "loss": 24.7617, + "step": 31750 + }, + { + "epoch": 0.06415720940379853, + "grad_norm": 670.1936645507812, + "learning_rate": 6.352e-06, + "loss": 18.105, + "step": 31760 + }, + { + "epoch": 0.06417741003648235, + "grad_norm": 350.1632385253906, + "learning_rate": 6.3540000000000005e-06, + "loss": 51.2723, + "step": 31770 + }, + { + "epoch": 0.06419761066916616, + "grad_norm": 300.8078308105469, + "learning_rate": 6.356000000000001e-06, + "loss": 48.8145, + "step": 31780 + }, + { + "epoch": 0.06421781130184998, + "grad_norm": 225.35494995117188, + "learning_rate": 6.358000000000001e-06, + "loss": 20.5653, + "step": 31790 + }, + { + "epoch": 0.0642380119345338, + "grad_norm": 263.5152893066406, + "learning_rate": 6.360000000000001e-06, + "loss": 34.9176, + "step": 31800 + }, + { + "epoch": 0.0642582125672176, + "grad_norm": 33.31119155883789, + "learning_rate": 6.362e-06, + "loss": 23.7906, + "step": 31810 + }, + { + "epoch": 0.06427841319990142, + "grad_norm": 763.8698120117188, + "learning_rate": 6.364e-06, + "loss": 23.5817, + "step": 31820 + }, + { + "epoch": 0.06429861383258524, + "grad_norm": 389.0904235839844, + "learning_rate": 6.366000000000001e-06, + "loss": 42.6483, + "step": 31830 + }, + { + "epoch": 0.06431881446526905, + "grad_norm": 371.7533264160156, + "learning_rate": 6.368000000000001e-06, + "loss": 9.3499, + "step": 31840 + }, + { + "epoch": 0.06433901509795287, + "grad_norm": 501.480712890625, + "learning_rate": 6.370000000000001e-06, + "loss": 38.5982, + "step": 31850 + }, + { + "epoch": 0.06435921573063669, + "grad_norm": 628.2760620117188, + "learning_rate": 6.372e-06, + "loss": 19.5583, + "step": 31860 + }, + { + "epoch": 0.0643794163633205, + "grad_norm": 151.403564453125, + "learning_rate": 6.374e-06, + "loss": 37.5179, + "step": 31870 + }, + { + "epoch": 0.06439961699600431, + "grad_norm": 260.3165283203125, + "learning_rate": 6.376e-06, + "loss": 16.6509, + "step": 31880 + }, + { + "epoch": 0.06441981762868813, + "grad_norm": 461.09088134765625, + "learning_rate": 6.378000000000001e-06, + "loss": 19.993, + "step": 31890 + }, + { + "epoch": 0.06444001826137194, + "grad_norm": 135.558349609375, + "learning_rate": 6.380000000000001e-06, + "loss": 28.1471, + "step": 31900 + }, + { + "epoch": 0.06446021889405576, + "grad_norm": 80.57373809814453, + "learning_rate": 6.382e-06, + "loss": 27.6211, + "step": 31910 + }, + { + "epoch": 0.06448041952673958, + "grad_norm": 178.90760803222656, + "learning_rate": 6.384e-06, + "loss": 28.3989, + "step": 31920 + }, + { + "epoch": 0.0645006201594234, + "grad_norm": 351.40704345703125, + "learning_rate": 6.386e-06, + "loss": 27.7896, + "step": 31930 + }, + { + "epoch": 0.0645208207921072, + "grad_norm": 311.6580810546875, + "learning_rate": 6.3880000000000005e-06, + "loss": 41.1878, + "step": 31940 + }, + { + "epoch": 0.06454102142479103, + "grad_norm": 308.5960388183594, + "learning_rate": 6.390000000000001e-06, + "loss": 22.5207, + "step": 31950 + }, + { + "epoch": 0.06456122205747485, + "grad_norm": 228.21206665039062, + "learning_rate": 6.392000000000001e-06, + "loss": 27.872, + "step": 31960 + }, + { + "epoch": 0.06458142269015865, + "grad_norm": 795.2063598632812, + "learning_rate": 6.394e-06, + "loss": 35.4486, + "step": 31970 + }, + { + "epoch": 0.06460162332284247, + "grad_norm": 408.62799072265625, + "learning_rate": 6.396e-06, + "loss": 21.6402, + "step": 31980 + }, + { + "epoch": 0.06462182395552629, + "grad_norm": 280.4447021484375, + "learning_rate": 6.398000000000001e-06, + "loss": 43.0083, + "step": 31990 + }, + { + "epoch": 0.0646420245882101, + "grad_norm": 950.8634033203125, + "learning_rate": 6.4000000000000006e-06, + "loss": 31.1654, + "step": 32000 + }, + { + "epoch": 0.06466222522089392, + "grad_norm": 231.40867614746094, + "learning_rate": 6.402000000000001e-06, + "loss": 28.7127, + "step": 32010 + }, + { + "epoch": 0.06468242585357774, + "grad_norm": 0.0, + "learning_rate": 6.404e-06, + "loss": 38.1864, + "step": 32020 + }, + { + "epoch": 0.06470262648626154, + "grad_norm": 947.143310546875, + "learning_rate": 6.406e-06, + "loss": 37.0088, + "step": 32030 + }, + { + "epoch": 0.06472282711894536, + "grad_norm": 140.33401489257812, + "learning_rate": 6.408000000000001e-06, + "loss": 30.061, + "step": 32040 + }, + { + "epoch": 0.06474302775162918, + "grad_norm": 315.9873962402344, + "learning_rate": 6.4100000000000005e-06, + "loss": 35.755, + "step": 32050 + }, + { + "epoch": 0.06476322838431299, + "grad_norm": 656.0057983398438, + "learning_rate": 6.412000000000001e-06, + "loss": 34.4754, + "step": 32060 + }, + { + "epoch": 0.06478342901699681, + "grad_norm": 739.964111328125, + "learning_rate": 6.414e-06, + "loss": 42.6976, + "step": 32070 + }, + { + "epoch": 0.06480362964968063, + "grad_norm": 267.9989013671875, + "learning_rate": 6.416e-06, + "loss": 41.9579, + "step": 32080 + }, + { + "epoch": 0.06482383028236445, + "grad_norm": 375.5707702636719, + "learning_rate": 6.418000000000001e-06, + "loss": 21.0082, + "step": 32090 + }, + { + "epoch": 0.06484403091504826, + "grad_norm": 374.5641174316406, + "learning_rate": 6.42e-06, + "loss": 32.6614, + "step": 32100 + }, + { + "epoch": 0.06486423154773208, + "grad_norm": 322.2223205566406, + "learning_rate": 6.4220000000000005e-06, + "loss": 28.5509, + "step": 32110 + }, + { + "epoch": 0.0648844321804159, + "grad_norm": 175.63418579101562, + "learning_rate": 6.424e-06, + "loss": 18.4089, + "step": 32120 + }, + { + "epoch": 0.0649046328130997, + "grad_norm": 450.5000915527344, + "learning_rate": 6.426e-06, + "loss": 28.6365, + "step": 32130 + }, + { + "epoch": 0.06492483344578352, + "grad_norm": 251.46981811523438, + "learning_rate": 6.428000000000001e-06, + "loss": 26.0443, + "step": 32140 + }, + { + "epoch": 0.06494503407846734, + "grad_norm": 338.2344665527344, + "learning_rate": 6.43e-06, + "loss": 13.9824, + "step": 32150 + }, + { + "epoch": 0.06496523471115115, + "grad_norm": 489.648681640625, + "learning_rate": 6.432e-06, + "loss": 25.5297, + "step": 32160 + }, + { + "epoch": 0.06498543534383497, + "grad_norm": 405.80755615234375, + "learning_rate": 6.4340000000000006e-06, + "loss": 35.4353, + "step": 32170 + }, + { + "epoch": 0.06500563597651879, + "grad_norm": 225.9192352294922, + "learning_rate": 6.436e-06, + "loss": 25.5489, + "step": 32180 + }, + { + "epoch": 0.0650258366092026, + "grad_norm": 356.1609191894531, + "learning_rate": 6.438000000000001e-06, + "loss": 24.6689, + "step": 32190 + }, + { + "epoch": 0.06504603724188641, + "grad_norm": 813.9902954101562, + "learning_rate": 6.440000000000001e-06, + "loss": 31.878, + "step": 32200 + }, + { + "epoch": 0.06506623787457023, + "grad_norm": 503.1162414550781, + "learning_rate": 6.442e-06, + "loss": 26.8903, + "step": 32210 + }, + { + "epoch": 0.06508643850725404, + "grad_norm": 291.009521484375, + "learning_rate": 6.4440000000000005e-06, + "loss": 24.9688, + "step": 32220 + }, + { + "epoch": 0.06510663913993786, + "grad_norm": 366.4163818359375, + "learning_rate": 6.446e-06, + "loss": 27.8312, + "step": 32230 + }, + { + "epoch": 0.06512683977262168, + "grad_norm": 602.8341674804688, + "learning_rate": 6.448000000000001e-06, + "loss": 22.8418, + "step": 32240 + }, + { + "epoch": 0.0651470404053055, + "grad_norm": 767.76953125, + "learning_rate": 6.450000000000001e-06, + "loss": 36.1939, + "step": 32250 + }, + { + "epoch": 0.0651672410379893, + "grad_norm": 0.0, + "learning_rate": 6.452e-06, + "loss": 23.4851, + "step": 32260 + }, + { + "epoch": 0.06518744167067313, + "grad_norm": 255.6028594970703, + "learning_rate": 6.454e-06, + "loss": 33.1797, + "step": 32270 + }, + { + "epoch": 0.06520764230335695, + "grad_norm": 236.62069702148438, + "learning_rate": 6.456e-06, + "loss": 44.0498, + "step": 32280 + }, + { + "epoch": 0.06522784293604075, + "grad_norm": 281.0309143066406, + "learning_rate": 6.458000000000001e-06, + "loss": 35.3185, + "step": 32290 + }, + { + "epoch": 0.06524804356872457, + "grad_norm": 532.2522583007812, + "learning_rate": 6.460000000000001e-06, + "loss": 24.893, + "step": 32300 + }, + { + "epoch": 0.06526824420140839, + "grad_norm": 290.1950988769531, + "learning_rate": 6.462e-06, + "loss": 23.176, + "step": 32310 + }, + { + "epoch": 0.0652884448340922, + "grad_norm": 836.5287475585938, + "learning_rate": 6.464e-06, + "loss": 27.6402, + "step": 32320 + }, + { + "epoch": 0.06530864546677602, + "grad_norm": 125.80975341796875, + "learning_rate": 6.4660000000000004e-06, + "loss": 36.2398, + "step": 32330 + }, + { + "epoch": 0.06532884609945984, + "grad_norm": 180.412353515625, + "learning_rate": 6.468000000000001e-06, + "loss": 32.0698, + "step": 32340 + }, + { + "epoch": 0.06534904673214365, + "grad_norm": 772.2728271484375, + "learning_rate": 6.470000000000001e-06, + "loss": 17.634, + "step": 32350 + }, + { + "epoch": 0.06536924736482747, + "grad_norm": 126.90397644042969, + "learning_rate": 6.472000000000001e-06, + "loss": 20.8186, + "step": 32360 + }, + { + "epoch": 0.06538944799751129, + "grad_norm": 134.90330505371094, + "learning_rate": 6.474e-06, + "loss": 20.7609, + "step": 32370 + }, + { + "epoch": 0.06540964863019509, + "grad_norm": 316.5060119628906, + "learning_rate": 6.476e-06, + "loss": 35.8064, + "step": 32380 + }, + { + "epoch": 0.06542984926287891, + "grad_norm": 235.25607299804688, + "learning_rate": 6.478000000000001e-06, + "loss": 26.0847, + "step": 32390 + }, + { + "epoch": 0.06545004989556273, + "grad_norm": 132.6982421875, + "learning_rate": 6.480000000000001e-06, + "loss": 18.1465, + "step": 32400 + }, + { + "epoch": 0.06547025052824655, + "grad_norm": 556.5181884765625, + "learning_rate": 6.482000000000001e-06, + "loss": 41.7568, + "step": 32410 + }, + { + "epoch": 0.06549045116093036, + "grad_norm": 319.8631896972656, + "learning_rate": 6.484e-06, + "loss": 34.7693, + "step": 32420 + }, + { + "epoch": 0.06551065179361418, + "grad_norm": 32.90923309326172, + "learning_rate": 6.486e-06, + "loss": 31.509, + "step": 32430 + }, + { + "epoch": 0.065530852426298, + "grad_norm": 227.41973876953125, + "learning_rate": 6.488000000000001e-06, + "loss": 27.032, + "step": 32440 + }, + { + "epoch": 0.0655510530589818, + "grad_norm": 784.7890625, + "learning_rate": 6.4900000000000005e-06, + "loss": 42.1183, + "step": 32450 + }, + { + "epoch": 0.06557125369166562, + "grad_norm": 265.337890625, + "learning_rate": 6.492000000000001e-06, + "loss": 33.2225, + "step": 32460 + }, + { + "epoch": 0.06559145432434944, + "grad_norm": 508.2408142089844, + "learning_rate": 6.494e-06, + "loss": 20.6025, + "step": 32470 + }, + { + "epoch": 0.06561165495703325, + "grad_norm": 342.9792785644531, + "learning_rate": 6.496e-06, + "loss": 34.3402, + "step": 32480 + }, + { + "epoch": 0.06563185558971707, + "grad_norm": 257.61163330078125, + "learning_rate": 6.498000000000001e-06, + "loss": 32.8761, + "step": 32490 + }, + { + "epoch": 0.06565205622240089, + "grad_norm": 117.67272186279297, + "learning_rate": 6.5000000000000004e-06, + "loss": 12.7806, + "step": 32500 + }, + { + "epoch": 0.0656722568550847, + "grad_norm": 225.24710083007812, + "learning_rate": 6.502000000000001e-06, + "loss": 50.6284, + "step": 32510 + }, + { + "epoch": 0.06569245748776852, + "grad_norm": 237.55392456054688, + "learning_rate": 6.504e-06, + "loss": 24.6255, + "step": 32520 + }, + { + "epoch": 0.06571265812045234, + "grad_norm": 369.5683288574219, + "learning_rate": 6.506e-06, + "loss": 30.3659, + "step": 32530 + }, + { + "epoch": 0.06573285875313614, + "grad_norm": 262.12298583984375, + "learning_rate": 6.508000000000001e-06, + "loss": 39.7136, + "step": 32540 + }, + { + "epoch": 0.06575305938581996, + "grad_norm": 325.452392578125, + "learning_rate": 6.51e-06, + "loss": 51.158, + "step": 32550 + }, + { + "epoch": 0.06577326001850378, + "grad_norm": 0.0, + "learning_rate": 6.5120000000000005e-06, + "loss": 44.0087, + "step": 32560 + }, + { + "epoch": 0.0657934606511876, + "grad_norm": 320.4726867675781, + "learning_rate": 6.514000000000001e-06, + "loss": 28.109, + "step": 32570 + }, + { + "epoch": 0.06581366128387141, + "grad_norm": 147.89549255371094, + "learning_rate": 6.516e-06, + "loss": 32.9733, + "step": 32580 + }, + { + "epoch": 0.06583386191655523, + "grad_norm": 506.1364440917969, + "learning_rate": 6.518000000000001e-06, + "loss": 47.3512, + "step": 32590 + }, + { + "epoch": 0.06585406254923905, + "grad_norm": 381.97509765625, + "learning_rate": 6.520000000000001e-06, + "loss": 39.4338, + "step": 32600 + }, + { + "epoch": 0.06587426318192285, + "grad_norm": 173.8516082763672, + "learning_rate": 6.522e-06, + "loss": 32.7929, + "step": 32610 + }, + { + "epoch": 0.06589446381460667, + "grad_norm": 489.1348571777344, + "learning_rate": 6.5240000000000006e-06, + "loss": 22.801, + "step": 32620 + }, + { + "epoch": 0.0659146644472905, + "grad_norm": 789.7333984375, + "learning_rate": 6.526e-06, + "loss": 33.7686, + "step": 32630 + }, + { + "epoch": 0.0659348650799743, + "grad_norm": 449.74951171875, + "learning_rate": 6.528000000000001e-06, + "loss": 25.6868, + "step": 32640 + }, + { + "epoch": 0.06595506571265812, + "grad_norm": 330.71246337890625, + "learning_rate": 6.530000000000001e-06, + "loss": 43.2862, + "step": 32650 + }, + { + "epoch": 0.06597526634534194, + "grad_norm": 230.9396209716797, + "learning_rate": 6.532e-06, + "loss": 34.1926, + "step": 32660 + }, + { + "epoch": 0.06599546697802575, + "grad_norm": 439.2969055175781, + "learning_rate": 6.5340000000000005e-06, + "loss": 25.3017, + "step": 32670 + }, + { + "epoch": 0.06601566761070957, + "grad_norm": 490.4678955078125, + "learning_rate": 6.536e-06, + "loss": 39.2081, + "step": 32680 + }, + { + "epoch": 0.06603586824339339, + "grad_norm": 196.3701934814453, + "learning_rate": 6.538000000000001e-06, + "loss": 41.4055, + "step": 32690 + }, + { + "epoch": 0.06605606887607719, + "grad_norm": 615.54736328125, + "learning_rate": 6.540000000000001e-06, + "loss": 43.3943, + "step": 32700 + }, + { + "epoch": 0.06607626950876101, + "grad_norm": 73.74063873291016, + "learning_rate": 6.542e-06, + "loss": 32.3963, + "step": 32710 + }, + { + "epoch": 0.06609647014144483, + "grad_norm": 108.1066665649414, + "learning_rate": 6.544e-06, + "loss": 21.0127, + "step": 32720 + }, + { + "epoch": 0.06611667077412865, + "grad_norm": 289.84039306640625, + "learning_rate": 6.5460000000000005e-06, + "loss": 31.1427, + "step": 32730 + }, + { + "epoch": 0.06613687140681246, + "grad_norm": 168.12002563476562, + "learning_rate": 6.548000000000001e-06, + "loss": 19.6645, + "step": 32740 + }, + { + "epoch": 0.06615707203949628, + "grad_norm": 339.53936767578125, + "learning_rate": 6.550000000000001e-06, + "loss": 30.5839, + "step": 32750 + }, + { + "epoch": 0.0661772726721801, + "grad_norm": 383.97412109375, + "learning_rate": 6.552000000000001e-06, + "loss": 32.9635, + "step": 32760 + }, + { + "epoch": 0.0661974733048639, + "grad_norm": 218.21884155273438, + "learning_rate": 6.554e-06, + "loss": 27.9132, + "step": 32770 + }, + { + "epoch": 0.06621767393754772, + "grad_norm": 58.37517547607422, + "learning_rate": 6.556e-06, + "loss": 30.3665, + "step": 32780 + }, + { + "epoch": 0.06623787457023154, + "grad_norm": 399.6036682128906, + "learning_rate": 6.558000000000001e-06, + "loss": 17.2957, + "step": 32790 + }, + { + "epoch": 0.06625807520291535, + "grad_norm": 385.6497497558594, + "learning_rate": 6.560000000000001e-06, + "loss": 29.2142, + "step": 32800 + }, + { + "epoch": 0.06627827583559917, + "grad_norm": 294.7921142578125, + "learning_rate": 6.562000000000001e-06, + "loss": 27.5119, + "step": 32810 + }, + { + "epoch": 0.06629847646828299, + "grad_norm": 809.83740234375, + "learning_rate": 6.564e-06, + "loss": 31.7384, + "step": 32820 + }, + { + "epoch": 0.0663186771009668, + "grad_norm": 337.61181640625, + "learning_rate": 6.566e-06, + "loss": 33.6985, + "step": 32830 + }, + { + "epoch": 0.06633887773365062, + "grad_norm": 579.421142578125, + "learning_rate": 6.568000000000001e-06, + "loss": 19.8273, + "step": 32840 + }, + { + "epoch": 0.06635907836633444, + "grad_norm": 186.95729064941406, + "learning_rate": 6.570000000000001e-06, + "loss": 15.9819, + "step": 32850 + }, + { + "epoch": 0.06637927899901824, + "grad_norm": 558.0016479492188, + "learning_rate": 6.572000000000001e-06, + "loss": 20.1292, + "step": 32860 + }, + { + "epoch": 0.06639947963170206, + "grad_norm": 165.0709686279297, + "learning_rate": 6.574e-06, + "loss": 28.7833, + "step": 32870 + }, + { + "epoch": 0.06641968026438588, + "grad_norm": 327.4534606933594, + "learning_rate": 6.576e-06, + "loss": 22.7556, + "step": 32880 + }, + { + "epoch": 0.0664398808970697, + "grad_norm": 159.4688262939453, + "learning_rate": 6.578000000000001e-06, + "loss": 28.5168, + "step": 32890 + }, + { + "epoch": 0.06646008152975351, + "grad_norm": 89.40301513671875, + "learning_rate": 6.5800000000000005e-06, + "loss": 19.6373, + "step": 32900 + }, + { + "epoch": 0.06648028216243733, + "grad_norm": 228.87832641601562, + "learning_rate": 6.582000000000001e-06, + "loss": 20.3056, + "step": 32910 + }, + { + "epoch": 0.06650048279512115, + "grad_norm": 63.90678024291992, + "learning_rate": 6.584e-06, + "loss": 28.0314, + "step": 32920 + }, + { + "epoch": 0.06652068342780496, + "grad_norm": 383.80682373046875, + "learning_rate": 6.586e-06, + "loss": 26.2479, + "step": 32930 + }, + { + "epoch": 0.06654088406048878, + "grad_norm": 657.4545288085938, + "learning_rate": 6.588000000000001e-06, + "loss": 31.423, + "step": 32940 + }, + { + "epoch": 0.0665610846931726, + "grad_norm": 375.2959289550781, + "learning_rate": 6.5900000000000004e-06, + "loss": 18.3765, + "step": 32950 + }, + { + "epoch": 0.0665812853258564, + "grad_norm": 262.6024475097656, + "learning_rate": 6.592000000000001e-06, + "loss": 27.5531, + "step": 32960 + }, + { + "epoch": 0.06660148595854022, + "grad_norm": 251.27822875976562, + "learning_rate": 6.594000000000001e-06, + "loss": 23.6339, + "step": 32970 + }, + { + "epoch": 0.06662168659122404, + "grad_norm": 354.53167724609375, + "learning_rate": 6.596e-06, + "loss": 21.4067, + "step": 32980 + }, + { + "epoch": 0.06664188722390785, + "grad_norm": 223.07188415527344, + "learning_rate": 6.598000000000001e-06, + "loss": 20.9345, + "step": 32990 + }, + { + "epoch": 0.06666208785659167, + "grad_norm": 335.9605712890625, + "learning_rate": 6.600000000000001e-06, + "loss": 40.661, + "step": 33000 + }, + { + "epoch": 0.06668228848927549, + "grad_norm": 203.1583251953125, + "learning_rate": 6.6020000000000005e-06, + "loss": 28.0269, + "step": 33010 + }, + { + "epoch": 0.0667024891219593, + "grad_norm": 370.2474670410156, + "learning_rate": 6.604000000000001e-06, + "loss": 23.4031, + "step": 33020 + }, + { + "epoch": 0.06672268975464311, + "grad_norm": 215.4385986328125, + "learning_rate": 6.606e-06, + "loss": 18.5859, + "step": 33030 + }, + { + "epoch": 0.06674289038732693, + "grad_norm": 80.94508361816406, + "learning_rate": 6.608000000000001e-06, + "loss": 34.3004, + "step": 33040 + }, + { + "epoch": 0.06676309102001075, + "grad_norm": 636.4049682617188, + "learning_rate": 6.610000000000001e-06, + "loss": 37.3925, + "step": 33050 + }, + { + "epoch": 0.06678329165269456, + "grad_norm": 375.9504699707031, + "learning_rate": 6.612e-06, + "loss": 15.6265, + "step": 33060 + }, + { + "epoch": 0.06680349228537838, + "grad_norm": 197.15830993652344, + "learning_rate": 6.6140000000000005e-06, + "loss": 18.0247, + "step": 33070 + }, + { + "epoch": 0.0668236929180622, + "grad_norm": 266.53692626953125, + "learning_rate": 6.616e-06, + "loss": 22.4474, + "step": 33080 + }, + { + "epoch": 0.066843893550746, + "grad_norm": 581.1676025390625, + "learning_rate": 6.618000000000001e-06, + "loss": 36.025, + "step": 33090 + }, + { + "epoch": 0.06686409418342983, + "grad_norm": 386.5500793457031, + "learning_rate": 6.620000000000001e-06, + "loss": 19.8663, + "step": 33100 + }, + { + "epoch": 0.06688429481611365, + "grad_norm": 96.17058563232422, + "learning_rate": 6.622e-06, + "loss": 33.4399, + "step": 33110 + }, + { + "epoch": 0.06690449544879745, + "grad_norm": 459.32244873046875, + "learning_rate": 6.6240000000000004e-06, + "loss": 20.7274, + "step": 33120 + }, + { + "epoch": 0.06692469608148127, + "grad_norm": 77.63929748535156, + "learning_rate": 6.626000000000001e-06, + "loss": 31.7773, + "step": 33130 + }, + { + "epoch": 0.06694489671416509, + "grad_norm": 224.4654998779297, + "learning_rate": 6.628e-06, + "loss": 24.23, + "step": 33140 + }, + { + "epoch": 0.0669650973468489, + "grad_norm": 627.4200439453125, + "learning_rate": 6.630000000000001e-06, + "loss": 39.5084, + "step": 33150 + }, + { + "epoch": 0.06698529797953272, + "grad_norm": 551.1626586914062, + "learning_rate": 6.632000000000001e-06, + "loss": 22.4483, + "step": 33160 + }, + { + "epoch": 0.06700549861221654, + "grad_norm": 250.13424682617188, + "learning_rate": 6.634e-06, + "loss": 29.2267, + "step": 33170 + }, + { + "epoch": 0.06702569924490034, + "grad_norm": 604.459716796875, + "learning_rate": 6.6360000000000005e-06, + "loss": 32.5056, + "step": 33180 + }, + { + "epoch": 0.06704589987758416, + "grad_norm": 236.87158203125, + "learning_rate": 6.638e-06, + "loss": 47.2783, + "step": 33190 + }, + { + "epoch": 0.06706610051026798, + "grad_norm": 313.0286865234375, + "learning_rate": 6.640000000000001e-06, + "loss": 14.8039, + "step": 33200 + }, + { + "epoch": 0.0670863011429518, + "grad_norm": 989.0570678710938, + "learning_rate": 6.642000000000001e-06, + "loss": 34.107, + "step": 33210 + }, + { + "epoch": 0.06710650177563561, + "grad_norm": 192.0631561279297, + "learning_rate": 6.644e-06, + "loss": 27.1685, + "step": 33220 + }, + { + "epoch": 0.06712670240831943, + "grad_norm": 255.91061401367188, + "learning_rate": 6.646e-06, + "loss": 22.7502, + "step": 33230 + }, + { + "epoch": 0.06714690304100325, + "grad_norm": 518.427734375, + "learning_rate": 6.648e-06, + "loss": 34.9704, + "step": 33240 + }, + { + "epoch": 0.06716710367368706, + "grad_norm": 560.7759399414062, + "learning_rate": 6.650000000000001e-06, + "loss": 39.8107, + "step": 33250 + }, + { + "epoch": 0.06718730430637088, + "grad_norm": 434.3753662109375, + "learning_rate": 6.652000000000001e-06, + "loss": 33.0775, + "step": 33260 + }, + { + "epoch": 0.0672075049390547, + "grad_norm": 322.3213195800781, + "learning_rate": 6.654e-06, + "loss": 30.8232, + "step": 33270 + }, + { + "epoch": 0.0672277055717385, + "grad_norm": 388.7810363769531, + "learning_rate": 6.656e-06, + "loss": 50.0331, + "step": 33280 + }, + { + "epoch": 0.06724790620442232, + "grad_norm": 631.3594360351562, + "learning_rate": 6.658e-06, + "loss": 30.9435, + "step": 33290 + }, + { + "epoch": 0.06726810683710614, + "grad_norm": 185.04061889648438, + "learning_rate": 6.660000000000001e-06, + "loss": 21.1964, + "step": 33300 + }, + { + "epoch": 0.06728830746978995, + "grad_norm": 298.79351806640625, + "learning_rate": 6.662000000000001e-06, + "loss": 15.2041, + "step": 33310 + }, + { + "epoch": 0.06730850810247377, + "grad_norm": 249.6124267578125, + "learning_rate": 6.664e-06, + "loss": 31.553, + "step": 33320 + }, + { + "epoch": 0.06732870873515759, + "grad_norm": 262.3807067871094, + "learning_rate": 6.666e-06, + "loss": 27.6135, + "step": 33330 + }, + { + "epoch": 0.0673489093678414, + "grad_norm": 130.87686157226562, + "learning_rate": 6.668e-06, + "loss": 30.4478, + "step": 33340 + }, + { + "epoch": 0.06736911000052521, + "grad_norm": 285.0330810546875, + "learning_rate": 6.6700000000000005e-06, + "loss": 21.2741, + "step": 33350 + }, + { + "epoch": 0.06738931063320903, + "grad_norm": 237.6591796875, + "learning_rate": 6.672000000000001e-06, + "loss": 27.5899, + "step": 33360 + }, + { + "epoch": 0.06740951126589285, + "grad_norm": 379.7192077636719, + "learning_rate": 6.674000000000001e-06, + "loss": 21.7786, + "step": 33370 + }, + { + "epoch": 0.06742971189857666, + "grad_norm": 126.19786071777344, + "learning_rate": 6.676e-06, + "loss": 36.2694, + "step": 33380 + }, + { + "epoch": 0.06744991253126048, + "grad_norm": 159.53851318359375, + "learning_rate": 6.678e-06, + "loss": 26.82, + "step": 33390 + }, + { + "epoch": 0.0674701131639443, + "grad_norm": 114.0098876953125, + "learning_rate": 6.680000000000001e-06, + "loss": 28.3109, + "step": 33400 + }, + { + "epoch": 0.0674903137966281, + "grad_norm": 60.16526794433594, + "learning_rate": 6.6820000000000006e-06, + "loss": 27.0503, + "step": 33410 + }, + { + "epoch": 0.06751051442931193, + "grad_norm": 110.24915313720703, + "learning_rate": 6.684000000000001e-06, + "loss": 24.2053, + "step": 33420 + }, + { + "epoch": 0.06753071506199575, + "grad_norm": 329.45391845703125, + "learning_rate": 6.686e-06, + "loss": 31.3305, + "step": 33430 + }, + { + "epoch": 0.06755091569467955, + "grad_norm": 333.7715148925781, + "learning_rate": 6.688e-06, + "loss": 23.5648, + "step": 33440 + }, + { + "epoch": 0.06757111632736337, + "grad_norm": 99.59442138671875, + "learning_rate": 6.690000000000001e-06, + "loss": 34.6192, + "step": 33450 + }, + { + "epoch": 0.06759131696004719, + "grad_norm": 430.8699035644531, + "learning_rate": 6.6920000000000005e-06, + "loss": 26.0351, + "step": 33460 + }, + { + "epoch": 0.067611517592731, + "grad_norm": 340.1473693847656, + "learning_rate": 6.694000000000001e-06, + "loss": 26.3428, + "step": 33470 + }, + { + "epoch": 0.06763171822541482, + "grad_norm": 135.01898193359375, + "learning_rate": 6.696e-06, + "loss": 24.8854, + "step": 33480 + }, + { + "epoch": 0.06765191885809864, + "grad_norm": 373.7020568847656, + "learning_rate": 6.698e-06, + "loss": 29.3576, + "step": 33490 + }, + { + "epoch": 0.06767211949078245, + "grad_norm": 249.206787109375, + "learning_rate": 6.700000000000001e-06, + "loss": 22.7511, + "step": 33500 + }, + { + "epoch": 0.06769232012346627, + "grad_norm": 780.90576171875, + "learning_rate": 6.702e-06, + "loss": 29.4561, + "step": 33510 + }, + { + "epoch": 0.06771252075615009, + "grad_norm": 396.869384765625, + "learning_rate": 6.7040000000000005e-06, + "loss": 18.4295, + "step": 33520 + }, + { + "epoch": 0.06773272138883389, + "grad_norm": 549.2235717773438, + "learning_rate": 6.706000000000001e-06, + "loss": 29.0248, + "step": 33530 + }, + { + "epoch": 0.06775292202151771, + "grad_norm": 343.7025146484375, + "learning_rate": 6.708e-06, + "loss": 24.1803, + "step": 33540 + }, + { + "epoch": 0.06777312265420153, + "grad_norm": 306.8595886230469, + "learning_rate": 6.710000000000001e-06, + "loss": 22.515, + "step": 33550 + }, + { + "epoch": 0.06779332328688535, + "grad_norm": 243.19854736328125, + "learning_rate": 6.712000000000001e-06, + "loss": 40.1751, + "step": 33560 + }, + { + "epoch": 0.06781352391956916, + "grad_norm": 265.84185791015625, + "learning_rate": 6.7140000000000004e-06, + "loss": 24.2147, + "step": 33570 + }, + { + "epoch": 0.06783372455225298, + "grad_norm": 432.4148254394531, + "learning_rate": 6.716000000000001e-06, + "loss": 29.5186, + "step": 33580 + }, + { + "epoch": 0.0678539251849368, + "grad_norm": 140.1952362060547, + "learning_rate": 6.718e-06, + "loss": 31.6897, + "step": 33590 + }, + { + "epoch": 0.0678741258176206, + "grad_norm": 488.43701171875, + "learning_rate": 6.720000000000001e-06, + "loss": 29.2323, + "step": 33600 + }, + { + "epoch": 0.06789432645030442, + "grad_norm": 271.6667175292969, + "learning_rate": 6.722000000000001e-06, + "loss": 63.0262, + "step": 33610 + }, + { + "epoch": 0.06791452708298824, + "grad_norm": 726.9092407226562, + "learning_rate": 6.724e-06, + "loss": 43.2171, + "step": 33620 + }, + { + "epoch": 0.06793472771567205, + "grad_norm": 896.5654907226562, + "learning_rate": 6.7260000000000005e-06, + "loss": 33.0095, + "step": 33630 + }, + { + "epoch": 0.06795492834835587, + "grad_norm": 138.67282104492188, + "learning_rate": 6.728e-06, + "loss": 23.7521, + "step": 33640 + }, + { + "epoch": 0.06797512898103969, + "grad_norm": 308.3063659667969, + "learning_rate": 6.730000000000001e-06, + "loss": 26.7069, + "step": 33650 + }, + { + "epoch": 0.0679953296137235, + "grad_norm": 545.8862915039062, + "learning_rate": 6.732000000000001e-06, + "loss": 26.4732, + "step": 33660 + }, + { + "epoch": 0.06801553024640732, + "grad_norm": 162.01895141601562, + "learning_rate": 6.734e-06, + "loss": 20.6606, + "step": 33670 + }, + { + "epoch": 0.06803573087909114, + "grad_norm": 446.10760498046875, + "learning_rate": 6.736e-06, + "loss": 35.1819, + "step": 33680 + }, + { + "epoch": 0.06805593151177494, + "grad_norm": 197.63351440429688, + "learning_rate": 6.738e-06, + "loss": 32.4311, + "step": 33690 + }, + { + "epoch": 0.06807613214445876, + "grad_norm": 669.9923095703125, + "learning_rate": 6.740000000000001e-06, + "loss": 34.6298, + "step": 33700 + }, + { + "epoch": 0.06809633277714258, + "grad_norm": 251.99420166015625, + "learning_rate": 6.742000000000001e-06, + "loss": 20.4015, + "step": 33710 + }, + { + "epoch": 0.0681165334098264, + "grad_norm": 1387.9864501953125, + "learning_rate": 6.744e-06, + "loss": 43.5895, + "step": 33720 + }, + { + "epoch": 0.06813673404251021, + "grad_norm": 466.648681640625, + "learning_rate": 6.746e-06, + "loss": 50.2172, + "step": 33730 + }, + { + "epoch": 0.06815693467519403, + "grad_norm": 296.6073913574219, + "learning_rate": 6.7480000000000004e-06, + "loss": 31.7947, + "step": 33740 + }, + { + "epoch": 0.06817713530787785, + "grad_norm": 183.97279357910156, + "learning_rate": 6.750000000000001e-06, + "loss": 22.4225, + "step": 33750 + }, + { + "epoch": 0.06819733594056165, + "grad_norm": 541.4666748046875, + "learning_rate": 6.752000000000001e-06, + "loss": 33.7121, + "step": 33760 + }, + { + "epoch": 0.06821753657324547, + "grad_norm": 422.0052185058594, + "learning_rate": 6.754000000000001e-06, + "loss": 22.4945, + "step": 33770 + }, + { + "epoch": 0.0682377372059293, + "grad_norm": 290.2746276855469, + "learning_rate": 6.756e-06, + "loss": 24.8973, + "step": 33780 + }, + { + "epoch": 0.0682579378386131, + "grad_norm": 307.8942565917969, + "learning_rate": 6.758e-06, + "loss": 28.1959, + "step": 33790 + }, + { + "epoch": 0.06827813847129692, + "grad_norm": 43.59174346923828, + "learning_rate": 6.760000000000001e-06, + "loss": 34.4528, + "step": 33800 + }, + { + "epoch": 0.06829833910398074, + "grad_norm": 489.6438293457031, + "learning_rate": 6.762000000000001e-06, + "loss": 30.3156, + "step": 33810 + }, + { + "epoch": 0.06831853973666455, + "grad_norm": 204.8790740966797, + "learning_rate": 6.764000000000001e-06, + "loss": 25.7045, + "step": 33820 + }, + { + "epoch": 0.06833874036934837, + "grad_norm": 406.2287902832031, + "learning_rate": 6.766e-06, + "loss": 31.5095, + "step": 33830 + }, + { + "epoch": 0.06835894100203219, + "grad_norm": 279.4950866699219, + "learning_rate": 6.768e-06, + "loss": 29.7517, + "step": 33840 + }, + { + "epoch": 0.06837914163471599, + "grad_norm": 928.823486328125, + "learning_rate": 6.770000000000001e-06, + "loss": 41.224, + "step": 33850 + }, + { + "epoch": 0.06839934226739981, + "grad_norm": 317.24188232421875, + "learning_rate": 6.7720000000000006e-06, + "loss": 19.2596, + "step": 33860 + }, + { + "epoch": 0.06841954290008363, + "grad_norm": 297.966552734375, + "learning_rate": 6.774000000000001e-06, + "loss": 38.4275, + "step": 33870 + }, + { + "epoch": 0.06843974353276745, + "grad_norm": 214.70057678222656, + "learning_rate": 6.776e-06, + "loss": 33.0122, + "step": 33880 + }, + { + "epoch": 0.06845994416545126, + "grad_norm": 333.35205078125, + "learning_rate": 6.778e-06, + "loss": 22.0829, + "step": 33890 + }, + { + "epoch": 0.06848014479813508, + "grad_norm": 229.147216796875, + "learning_rate": 6.780000000000001e-06, + "loss": 22.9107, + "step": 33900 + }, + { + "epoch": 0.0685003454308189, + "grad_norm": 262.1947326660156, + "learning_rate": 6.7820000000000005e-06, + "loss": 48.3566, + "step": 33910 + }, + { + "epoch": 0.0685205460635027, + "grad_norm": 332.7762451171875, + "learning_rate": 6.784000000000001e-06, + "loss": 27.2738, + "step": 33920 + }, + { + "epoch": 0.06854074669618652, + "grad_norm": 184.263427734375, + "learning_rate": 6.786000000000001e-06, + "loss": 31.9823, + "step": 33930 + }, + { + "epoch": 0.06856094732887034, + "grad_norm": 719.2918090820312, + "learning_rate": 6.788e-06, + "loss": 38.1228, + "step": 33940 + }, + { + "epoch": 0.06858114796155415, + "grad_norm": 319.15362548828125, + "learning_rate": 6.790000000000001e-06, + "loss": 25.3327, + "step": 33950 + }, + { + "epoch": 0.06860134859423797, + "grad_norm": 182.20993041992188, + "learning_rate": 6.792000000000001e-06, + "loss": 34.7985, + "step": 33960 + }, + { + "epoch": 0.06862154922692179, + "grad_norm": 426.1497802734375, + "learning_rate": 6.7940000000000005e-06, + "loss": 35.3077, + "step": 33970 + }, + { + "epoch": 0.0686417498596056, + "grad_norm": 167.3736114501953, + "learning_rate": 6.796000000000001e-06, + "loss": 41.2279, + "step": 33980 + }, + { + "epoch": 0.06866195049228942, + "grad_norm": 0.0, + "learning_rate": 6.798e-06, + "loss": 69.9124, + "step": 33990 + }, + { + "epoch": 0.06868215112497324, + "grad_norm": 205.83233642578125, + "learning_rate": 6.800000000000001e-06, + "loss": 27.4275, + "step": 34000 + }, + { + "epoch": 0.06870235175765704, + "grad_norm": 229.70562744140625, + "learning_rate": 6.802000000000001e-06, + "loss": 27.7193, + "step": 34010 + }, + { + "epoch": 0.06872255239034086, + "grad_norm": 277.613037109375, + "learning_rate": 6.804e-06, + "loss": 33.3919, + "step": 34020 + }, + { + "epoch": 0.06874275302302468, + "grad_norm": 455.9620056152344, + "learning_rate": 6.8060000000000006e-06, + "loss": 31.3386, + "step": 34030 + }, + { + "epoch": 0.0687629536557085, + "grad_norm": 76.26141357421875, + "learning_rate": 6.808e-06, + "loss": 22.4672, + "step": 34040 + }, + { + "epoch": 0.06878315428839231, + "grad_norm": 1023.4114379882812, + "learning_rate": 6.810000000000001e-06, + "loss": 28.7594, + "step": 34050 + }, + { + "epoch": 0.06880335492107613, + "grad_norm": 261.6186828613281, + "learning_rate": 6.812000000000001e-06, + "loss": 37.1378, + "step": 34060 + }, + { + "epoch": 0.06882355555375995, + "grad_norm": 296.997314453125, + "learning_rate": 6.814e-06, + "loss": 22.5021, + "step": 34070 + }, + { + "epoch": 0.06884375618644376, + "grad_norm": 603.8565673828125, + "learning_rate": 6.8160000000000005e-06, + "loss": 34.0662, + "step": 34080 + }, + { + "epoch": 0.06886395681912758, + "grad_norm": 265.9648132324219, + "learning_rate": 6.818e-06, + "loss": 24.5765, + "step": 34090 + }, + { + "epoch": 0.0688841574518114, + "grad_norm": 60.6540641784668, + "learning_rate": 6.820000000000001e-06, + "loss": 43.553, + "step": 34100 + }, + { + "epoch": 0.0689043580844952, + "grad_norm": 162.8219451904297, + "learning_rate": 6.822000000000001e-06, + "loss": 26.4204, + "step": 34110 + }, + { + "epoch": 0.06892455871717902, + "grad_norm": 327.31219482421875, + "learning_rate": 6.824e-06, + "loss": 24.6038, + "step": 34120 + }, + { + "epoch": 0.06894475934986284, + "grad_norm": 298.9954528808594, + "learning_rate": 6.826e-06, + "loss": 15.5414, + "step": 34130 + }, + { + "epoch": 0.06896495998254665, + "grad_norm": 378.151123046875, + "learning_rate": 6.8280000000000005e-06, + "loss": 35.4214, + "step": 34140 + }, + { + "epoch": 0.06898516061523047, + "grad_norm": 407.6482849121094, + "learning_rate": 6.830000000000001e-06, + "loss": 16.3223, + "step": 34150 + }, + { + "epoch": 0.06900536124791429, + "grad_norm": 313.89410400390625, + "learning_rate": 6.832000000000001e-06, + "loss": 19.986, + "step": 34160 + }, + { + "epoch": 0.0690255618805981, + "grad_norm": 450.92047119140625, + "learning_rate": 6.834000000000001e-06, + "loss": 23.1717, + "step": 34170 + }, + { + "epoch": 0.06904576251328191, + "grad_norm": 367.4051208496094, + "learning_rate": 6.836e-06, + "loss": 18.2367, + "step": 34180 + }, + { + "epoch": 0.06906596314596573, + "grad_norm": 200.5437469482422, + "learning_rate": 6.8380000000000004e-06, + "loss": 27.8734, + "step": 34190 + }, + { + "epoch": 0.06908616377864955, + "grad_norm": 167.73825073242188, + "learning_rate": 6.8400000000000014e-06, + "loss": 37.5091, + "step": 34200 + }, + { + "epoch": 0.06910636441133336, + "grad_norm": 175.97230529785156, + "learning_rate": 6.842000000000001e-06, + "loss": 37.9024, + "step": 34210 + }, + { + "epoch": 0.06912656504401718, + "grad_norm": 276.8113708496094, + "learning_rate": 6.844000000000001e-06, + "loss": 30.3383, + "step": 34220 + }, + { + "epoch": 0.069146765676701, + "grad_norm": 413.5763854980469, + "learning_rate": 6.846e-06, + "loss": 22.2407, + "step": 34230 + }, + { + "epoch": 0.0691669663093848, + "grad_norm": 54.51414108276367, + "learning_rate": 6.848e-06, + "loss": 20.599, + "step": 34240 + }, + { + "epoch": 0.06918716694206863, + "grad_norm": 547.967041015625, + "learning_rate": 6.850000000000001e-06, + "loss": 30.5517, + "step": 34250 + }, + { + "epoch": 0.06920736757475245, + "grad_norm": 153.64317321777344, + "learning_rate": 6.852000000000001e-06, + "loss": 29.968, + "step": 34260 + }, + { + "epoch": 0.06922756820743625, + "grad_norm": 510.6077575683594, + "learning_rate": 6.854000000000001e-06, + "loss": 30.9965, + "step": 34270 + }, + { + "epoch": 0.06924776884012007, + "grad_norm": 513.1456298828125, + "learning_rate": 6.856e-06, + "loss": 28.7472, + "step": 34280 + }, + { + "epoch": 0.06926796947280389, + "grad_norm": 260.1783447265625, + "learning_rate": 6.858e-06, + "loss": 35.7646, + "step": 34290 + }, + { + "epoch": 0.0692881701054877, + "grad_norm": 1510.2899169921875, + "learning_rate": 6.860000000000001e-06, + "loss": 25.1878, + "step": 34300 + }, + { + "epoch": 0.06930837073817152, + "grad_norm": 482.1159362792969, + "learning_rate": 6.8620000000000005e-06, + "loss": 26.38, + "step": 34310 + }, + { + "epoch": 0.06932857137085534, + "grad_norm": 530.0428466796875, + "learning_rate": 6.864000000000001e-06, + "loss": 25.9121, + "step": 34320 + }, + { + "epoch": 0.06934877200353914, + "grad_norm": 584.091064453125, + "learning_rate": 6.866000000000001e-06, + "loss": 49.2923, + "step": 34330 + }, + { + "epoch": 0.06936897263622296, + "grad_norm": 301.7872009277344, + "learning_rate": 6.868e-06, + "loss": 17.6216, + "step": 34340 + }, + { + "epoch": 0.06938917326890678, + "grad_norm": 342.6329040527344, + "learning_rate": 6.870000000000001e-06, + "loss": 21.9526, + "step": 34350 + }, + { + "epoch": 0.0694093739015906, + "grad_norm": 1223.290283203125, + "learning_rate": 6.872000000000001e-06, + "loss": 43.0305, + "step": 34360 + }, + { + "epoch": 0.06942957453427441, + "grad_norm": 513.8219604492188, + "learning_rate": 6.874000000000001e-06, + "loss": 39.9957, + "step": 34370 + }, + { + "epoch": 0.06944977516695823, + "grad_norm": 573.5376586914062, + "learning_rate": 6.876000000000001e-06, + "loss": 35.5843, + "step": 34380 + }, + { + "epoch": 0.06946997579964205, + "grad_norm": 321.61480712890625, + "learning_rate": 6.878e-06, + "loss": 33.41, + "step": 34390 + }, + { + "epoch": 0.06949017643232586, + "grad_norm": 319.8245544433594, + "learning_rate": 6.88e-06, + "loss": 16.906, + "step": 34400 + }, + { + "epoch": 0.06951037706500968, + "grad_norm": 409.17474365234375, + "learning_rate": 6.882000000000001e-06, + "loss": 31.1368, + "step": 34410 + }, + { + "epoch": 0.0695305776976935, + "grad_norm": 329.41058349609375, + "learning_rate": 6.8840000000000005e-06, + "loss": 18.909, + "step": 34420 + }, + { + "epoch": 0.0695507783303773, + "grad_norm": 87.27323913574219, + "learning_rate": 6.886000000000001e-06, + "loss": 26.7308, + "step": 34430 + }, + { + "epoch": 0.06957097896306112, + "grad_norm": 313.5718994140625, + "learning_rate": 6.888e-06, + "loss": 42.2084, + "step": 34440 + }, + { + "epoch": 0.06959117959574494, + "grad_norm": 469.9633483886719, + "learning_rate": 6.89e-06, + "loss": 35.2144, + "step": 34450 + }, + { + "epoch": 0.06961138022842875, + "grad_norm": 277.5743713378906, + "learning_rate": 6.892000000000001e-06, + "loss": 26.8681, + "step": 34460 + }, + { + "epoch": 0.06963158086111257, + "grad_norm": 663.92041015625, + "learning_rate": 6.894e-06, + "loss": 28.4116, + "step": 34470 + }, + { + "epoch": 0.06965178149379639, + "grad_norm": 149.6378173828125, + "learning_rate": 6.8960000000000006e-06, + "loss": 15.5917, + "step": 34480 + }, + { + "epoch": 0.0696719821264802, + "grad_norm": 220.55062866210938, + "learning_rate": 6.898e-06, + "loss": 37.5737, + "step": 34490 + }, + { + "epoch": 0.06969218275916401, + "grad_norm": 89.65522766113281, + "learning_rate": 6.9e-06, + "loss": 21.6902, + "step": 34500 + }, + { + "epoch": 0.06971238339184783, + "grad_norm": 479.03692626953125, + "learning_rate": 6.902000000000001e-06, + "loss": 15.2559, + "step": 34510 + }, + { + "epoch": 0.06973258402453165, + "grad_norm": 141.52882385253906, + "learning_rate": 6.904e-06, + "loss": 14.7233, + "step": 34520 + }, + { + "epoch": 0.06975278465721546, + "grad_norm": 237.5696258544922, + "learning_rate": 6.9060000000000005e-06, + "loss": 22.7162, + "step": 34530 + }, + { + "epoch": 0.06977298528989928, + "grad_norm": 185.33267211914062, + "learning_rate": 6.908000000000001e-06, + "loss": 27.6367, + "step": 34540 + }, + { + "epoch": 0.0697931859225831, + "grad_norm": 603.0072021484375, + "learning_rate": 6.91e-06, + "loss": 28.158, + "step": 34550 + }, + { + "epoch": 0.06981338655526691, + "grad_norm": 386.4322814941406, + "learning_rate": 6.912000000000001e-06, + "loss": 50.5451, + "step": 34560 + }, + { + "epoch": 0.06983358718795073, + "grad_norm": 209.9215850830078, + "learning_rate": 6.914000000000001e-06, + "loss": 44.3601, + "step": 34570 + }, + { + "epoch": 0.06985378782063455, + "grad_norm": 481.27508544921875, + "learning_rate": 6.916e-06, + "loss": 26.0773, + "step": 34580 + }, + { + "epoch": 0.06987398845331835, + "grad_norm": 579.3642578125, + "learning_rate": 6.9180000000000005e-06, + "loss": 22.9499, + "step": 34590 + }, + { + "epoch": 0.06989418908600217, + "grad_norm": 461.774169921875, + "learning_rate": 6.92e-06, + "loss": 29.6425, + "step": 34600 + }, + { + "epoch": 0.06991438971868599, + "grad_norm": 297.2777099609375, + "learning_rate": 6.922000000000001e-06, + "loss": 26.7038, + "step": 34610 + }, + { + "epoch": 0.0699345903513698, + "grad_norm": 370.90728759765625, + "learning_rate": 6.924000000000001e-06, + "loss": 34.0258, + "step": 34620 + }, + { + "epoch": 0.06995479098405362, + "grad_norm": 438.7866516113281, + "learning_rate": 6.926e-06, + "loss": 31.7307, + "step": 34630 + }, + { + "epoch": 0.06997499161673744, + "grad_norm": 42.228172302246094, + "learning_rate": 6.928e-06, + "loss": 29.1728, + "step": 34640 + }, + { + "epoch": 0.06999519224942125, + "grad_norm": 156.34921264648438, + "learning_rate": 6.93e-06, + "loss": 31.3085, + "step": 34650 + }, + { + "epoch": 0.07001539288210507, + "grad_norm": 129.1876678466797, + "learning_rate": 6.932000000000001e-06, + "loss": 17.2462, + "step": 34660 + }, + { + "epoch": 0.07003559351478889, + "grad_norm": 183.5938262939453, + "learning_rate": 6.934000000000001e-06, + "loss": 28.9883, + "step": 34670 + }, + { + "epoch": 0.0700557941474727, + "grad_norm": 298.6161804199219, + "learning_rate": 6.936e-06, + "loss": 32.2716, + "step": 34680 + }, + { + "epoch": 0.07007599478015651, + "grad_norm": 276.34808349609375, + "learning_rate": 6.938e-06, + "loss": 32.5951, + "step": 34690 + }, + { + "epoch": 0.07009619541284033, + "grad_norm": 225.87469482421875, + "learning_rate": 6.9400000000000005e-06, + "loss": 30.0686, + "step": 34700 + }, + { + "epoch": 0.07011639604552415, + "grad_norm": 622.208984375, + "learning_rate": 6.942000000000001e-06, + "loss": 45.3452, + "step": 34710 + }, + { + "epoch": 0.07013659667820796, + "grad_norm": 329.3135681152344, + "learning_rate": 6.944000000000001e-06, + "loss": 18.9175, + "step": 34720 + }, + { + "epoch": 0.07015679731089178, + "grad_norm": 364.23193359375, + "learning_rate": 6.946000000000001e-06, + "loss": 39.9556, + "step": 34730 + }, + { + "epoch": 0.0701769979435756, + "grad_norm": 692.3369750976562, + "learning_rate": 6.948e-06, + "loss": 48.0362, + "step": 34740 + }, + { + "epoch": 0.0701971985762594, + "grad_norm": 507.91473388671875, + "learning_rate": 6.95e-06, + "loss": 38.6651, + "step": 34750 + }, + { + "epoch": 0.07021739920894322, + "grad_norm": 513.9027709960938, + "learning_rate": 6.952000000000001e-06, + "loss": 38.2413, + "step": 34760 + }, + { + "epoch": 0.07023759984162704, + "grad_norm": 298.8250427246094, + "learning_rate": 6.954000000000001e-06, + "loss": 23.3547, + "step": 34770 + }, + { + "epoch": 0.07025780047431085, + "grad_norm": 152.75511169433594, + "learning_rate": 6.956000000000001e-06, + "loss": 28.2933, + "step": 34780 + }, + { + "epoch": 0.07027800110699467, + "grad_norm": 220.9697265625, + "learning_rate": 6.958e-06, + "loss": 17.977, + "step": 34790 + }, + { + "epoch": 0.07029820173967849, + "grad_norm": 122.43717956542969, + "learning_rate": 6.96e-06, + "loss": 16.0281, + "step": 34800 + }, + { + "epoch": 0.0703184023723623, + "grad_norm": 57.243896484375, + "learning_rate": 6.962000000000001e-06, + "loss": 55.2343, + "step": 34810 + }, + { + "epoch": 0.07033860300504612, + "grad_norm": 39.22971725463867, + "learning_rate": 6.964000000000001e-06, + "loss": 34.3773, + "step": 34820 + }, + { + "epoch": 0.07035880363772994, + "grad_norm": 378.3753662109375, + "learning_rate": 6.966000000000001e-06, + "loss": 37.3846, + "step": 34830 + }, + { + "epoch": 0.07037900427041376, + "grad_norm": 426.9247131347656, + "learning_rate": 6.968e-06, + "loss": 30.9604, + "step": 34840 + }, + { + "epoch": 0.07039920490309756, + "grad_norm": 584.5142211914062, + "learning_rate": 6.97e-06, + "loss": 32.2506, + "step": 34850 + }, + { + "epoch": 0.07041940553578138, + "grad_norm": 357.4947509765625, + "learning_rate": 6.972000000000001e-06, + "loss": 22.4271, + "step": 34860 + }, + { + "epoch": 0.0704396061684652, + "grad_norm": 393.0754089355469, + "learning_rate": 6.9740000000000005e-06, + "loss": 20.3699, + "step": 34870 + }, + { + "epoch": 0.07045980680114901, + "grad_norm": 422.4024658203125, + "learning_rate": 6.976000000000001e-06, + "loss": 28.3317, + "step": 34880 + }, + { + "epoch": 0.07048000743383283, + "grad_norm": 229.6375274658203, + "learning_rate": 6.978e-06, + "loss": 17.2899, + "step": 34890 + }, + { + "epoch": 0.07050020806651665, + "grad_norm": 212.41357421875, + "learning_rate": 6.98e-06, + "loss": 23.0419, + "step": 34900 + }, + { + "epoch": 0.07052040869920045, + "grad_norm": 297.3955993652344, + "learning_rate": 6.982000000000001e-06, + "loss": 37.1577, + "step": 34910 + }, + { + "epoch": 0.07054060933188427, + "grad_norm": 415.9945068359375, + "learning_rate": 6.984e-06, + "loss": 18.8616, + "step": 34920 + }, + { + "epoch": 0.0705608099645681, + "grad_norm": 282.5414123535156, + "learning_rate": 6.9860000000000005e-06, + "loss": 14.3868, + "step": 34930 + }, + { + "epoch": 0.0705810105972519, + "grad_norm": 990.1007690429688, + "learning_rate": 6.988000000000001e-06, + "loss": 58.3404, + "step": 34940 + }, + { + "epoch": 0.07060121122993572, + "grad_norm": 184.29627990722656, + "learning_rate": 6.99e-06, + "loss": 14.2599, + "step": 34950 + }, + { + "epoch": 0.07062141186261954, + "grad_norm": 319.30511474609375, + "learning_rate": 6.992000000000001e-06, + "loss": 40.0757, + "step": 34960 + }, + { + "epoch": 0.07064161249530335, + "grad_norm": 137.1524200439453, + "learning_rate": 6.994000000000001e-06, + "loss": 30.0043, + "step": 34970 + }, + { + "epoch": 0.07066181312798717, + "grad_norm": 83.04960632324219, + "learning_rate": 6.9960000000000004e-06, + "loss": 17.2658, + "step": 34980 + }, + { + "epoch": 0.07068201376067099, + "grad_norm": 161.24586486816406, + "learning_rate": 6.998000000000001e-06, + "loss": 32.1134, + "step": 34990 + }, + { + "epoch": 0.0707022143933548, + "grad_norm": 156.23355102539062, + "learning_rate": 7e-06, + "loss": 28.3132, + "step": 35000 + }, + { + "epoch": 0.07072241502603861, + "grad_norm": 406.73956298828125, + "learning_rate": 7.002000000000001e-06, + "loss": 31.5436, + "step": 35010 + }, + { + "epoch": 0.07074261565872243, + "grad_norm": 823.199462890625, + "learning_rate": 7.004000000000001e-06, + "loss": 44.0885, + "step": 35020 + }, + { + "epoch": 0.07076281629140625, + "grad_norm": 526.9590454101562, + "learning_rate": 7.006e-06, + "loss": 24.7382, + "step": 35030 + }, + { + "epoch": 0.07078301692409006, + "grad_norm": 58.91459655761719, + "learning_rate": 7.0080000000000005e-06, + "loss": 17.9061, + "step": 35040 + }, + { + "epoch": 0.07080321755677388, + "grad_norm": 776.4654541015625, + "learning_rate": 7.01e-06, + "loss": 34.614, + "step": 35050 + }, + { + "epoch": 0.0708234181894577, + "grad_norm": 507.73492431640625, + "learning_rate": 7.012000000000001e-06, + "loss": 21.6508, + "step": 35060 + }, + { + "epoch": 0.0708436188221415, + "grad_norm": 498.3752746582031, + "learning_rate": 7.014000000000001e-06, + "loss": 25.3131, + "step": 35070 + }, + { + "epoch": 0.07086381945482532, + "grad_norm": 183.4510955810547, + "learning_rate": 7.016e-06, + "loss": 25.0299, + "step": 35080 + }, + { + "epoch": 0.07088402008750914, + "grad_norm": 395.6617431640625, + "learning_rate": 7.018e-06, + "loss": 36.4378, + "step": 35090 + }, + { + "epoch": 0.07090422072019295, + "grad_norm": 446.00994873046875, + "learning_rate": 7.0200000000000006e-06, + "loss": 45.5589, + "step": 35100 + }, + { + "epoch": 0.07092442135287677, + "grad_norm": 208.86793518066406, + "learning_rate": 7.022000000000001e-06, + "loss": 31.6502, + "step": 35110 + }, + { + "epoch": 0.07094462198556059, + "grad_norm": 165.08340454101562, + "learning_rate": 7.024000000000001e-06, + "loss": 34.0186, + "step": 35120 + }, + { + "epoch": 0.0709648226182444, + "grad_norm": 166.9569549560547, + "learning_rate": 7.026000000000001e-06, + "loss": 19.1239, + "step": 35130 + }, + { + "epoch": 0.07098502325092822, + "grad_norm": 247.7082977294922, + "learning_rate": 7.028e-06, + "loss": 24.5042, + "step": 35140 + }, + { + "epoch": 0.07100522388361204, + "grad_norm": 296.1555480957031, + "learning_rate": 7.0300000000000005e-06, + "loss": 31.8144, + "step": 35150 + }, + { + "epoch": 0.07102542451629586, + "grad_norm": 271.96258544921875, + "learning_rate": 7.0320000000000015e-06, + "loss": 29.6839, + "step": 35160 + }, + { + "epoch": 0.07104562514897966, + "grad_norm": 373.5399169921875, + "learning_rate": 7.034000000000001e-06, + "loss": 17.8588, + "step": 35170 + }, + { + "epoch": 0.07106582578166348, + "grad_norm": 143.0919952392578, + "learning_rate": 7.036000000000001e-06, + "loss": 38.991, + "step": 35180 + }, + { + "epoch": 0.0710860264143473, + "grad_norm": 365.6285400390625, + "learning_rate": 7.038e-06, + "loss": 36.4588, + "step": 35190 + }, + { + "epoch": 0.07110622704703111, + "grad_norm": 227.5874786376953, + "learning_rate": 7.04e-06, + "loss": 43.24, + "step": 35200 + }, + { + "epoch": 0.07112642767971493, + "grad_norm": 172.9547119140625, + "learning_rate": 7.042000000000001e-06, + "loss": 31.8345, + "step": 35210 + }, + { + "epoch": 0.07114662831239875, + "grad_norm": 235.5738067626953, + "learning_rate": 7.044000000000001e-06, + "loss": 26.454, + "step": 35220 + }, + { + "epoch": 0.07116682894508256, + "grad_norm": 1110.907958984375, + "learning_rate": 7.046000000000001e-06, + "loss": 34.7562, + "step": 35230 + }, + { + "epoch": 0.07118702957776638, + "grad_norm": 0.0, + "learning_rate": 7.048e-06, + "loss": 37.1723, + "step": 35240 + }, + { + "epoch": 0.0712072302104502, + "grad_norm": 69.67388153076172, + "learning_rate": 7.05e-06, + "loss": 30.0476, + "step": 35250 + }, + { + "epoch": 0.071227430843134, + "grad_norm": 180.0526580810547, + "learning_rate": 7.052000000000001e-06, + "loss": 20.4527, + "step": 35260 + }, + { + "epoch": 0.07124763147581782, + "grad_norm": 189.87799072265625, + "learning_rate": 7.0540000000000006e-06, + "loss": 27.1138, + "step": 35270 + }, + { + "epoch": 0.07126783210850164, + "grad_norm": 117.58113861083984, + "learning_rate": 7.056000000000001e-06, + "loss": 17.698, + "step": 35280 + }, + { + "epoch": 0.07128803274118545, + "grad_norm": 586.3466796875, + "learning_rate": 7.058e-06, + "loss": 31.3433, + "step": 35290 + }, + { + "epoch": 0.07130823337386927, + "grad_norm": 226.22317504882812, + "learning_rate": 7.06e-06, + "loss": 34.7721, + "step": 35300 + }, + { + "epoch": 0.07132843400655309, + "grad_norm": 290.9054260253906, + "learning_rate": 7.062000000000001e-06, + "loss": 19.6257, + "step": 35310 + }, + { + "epoch": 0.07134863463923691, + "grad_norm": 183.03421020507812, + "learning_rate": 7.0640000000000005e-06, + "loss": 33.1939, + "step": 35320 + }, + { + "epoch": 0.07136883527192071, + "grad_norm": 909.1346435546875, + "learning_rate": 7.066000000000001e-06, + "loss": 34.783, + "step": 35330 + }, + { + "epoch": 0.07138903590460453, + "grad_norm": 719.4769287109375, + "learning_rate": 7.068000000000001e-06, + "loss": 29.6891, + "step": 35340 + }, + { + "epoch": 0.07140923653728835, + "grad_norm": 197.17449951171875, + "learning_rate": 7.07e-06, + "loss": 63.2718, + "step": 35350 + }, + { + "epoch": 0.07142943716997216, + "grad_norm": 511.7129821777344, + "learning_rate": 7.072000000000001e-06, + "loss": 24.5842, + "step": 35360 + }, + { + "epoch": 0.07144963780265598, + "grad_norm": 406.55767822265625, + "learning_rate": 7.074000000000001e-06, + "loss": 31.0791, + "step": 35370 + }, + { + "epoch": 0.0714698384353398, + "grad_norm": 135.95860290527344, + "learning_rate": 7.0760000000000005e-06, + "loss": 33.5035, + "step": 35380 + }, + { + "epoch": 0.0714900390680236, + "grad_norm": 218.39500427246094, + "learning_rate": 7.078000000000001e-06, + "loss": 28.668, + "step": 35390 + }, + { + "epoch": 0.07151023970070743, + "grad_norm": 411.2699890136719, + "learning_rate": 7.08e-06, + "loss": 26.1219, + "step": 35400 + }, + { + "epoch": 0.07153044033339125, + "grad_norm": 566.1672973632812, + "learning_rate": 7.082000000000001e-06, + "loss": 33.2163, + "step": 35410 + }, + { + "epoch": 0.07155064096607505, + "grad_norm": 140.92298889160156, + "learning_rate": 7.084000000000001e-06, + "loss": 19.9689, + "step": 35420 + }, + { + "epoch": 0.07157084159875887, + "grad_norm": 614.938232421875, + "learning_rate": 7.0860000000000004e-06, + "loss": 26.4136, + "step": 35430 + }, + { + "epoch": 0.07159104223144269, + "grad_norm": 447.7917785644531, + "learning_rate": 7.088000000000001e-06, + "loss": 21.5825, + "step": 35440 + }, + { + "epoch": 0.0716112428641265, + "grad_norm": 164.03590393066406, + "learning_rate": 7.09e-06, + "loss": 26.8849, + "step": 35450 + }, + { + "epoch": 0.07163144349681032, + "grad_norm": 349.9858703613281, + "learning_rate": 7.092000000000001e-06, + "loss": 41.2733, + "step": 35460 + }, + { + "epoch": 0.07165164412949414, + "grad_norm": 274.734375, + "learning_rate": 7.094000000000001e-06, + "loss": 28.6498, + "step": 35470 + }, + { + "epoch": 0.07167184476217796, + "grad_norm": 225.19076538085938, + "learning_rate": 7.096e-06, + "loss": 18.517, + "step": 35480 + }, + { + "epoch": 0.07169204539486176, + "grad_norm": 450.4459533691406, + "learning_rate": 7.0980000000000005e-06, + "loss": 17.9318, + "step": 35490 + }, + { + "epoch": 0.07171224602754558, + "grad_norm": 401.98333740234375, + "learning_rate": 7.100000000000001e-06, + "loss": 21.0995, + "step": 35500 + }, + { + "epoch": 0.0717324466602294, + "grad_norm": 110.38678741455078, + "learning_rate": 7.102000000000001e-06, + "loss": 15.5251, + "step": 35510 + }, + { + "epoch": 0.07175264729291321, + "grad_norm": 263.2588195800781, + "learning_rate": 7.104000000000001e-06, + "loss": 26.2152, + "step": 35520 + }, + { + "epoch": 0.07177284792559703, + "grad_norm": 483.0096130371094, + "learning_rate": 7.106000000000001e-06, + "loss": 26.1094, + "step": 35530 + }, + { + "epoch": 0.07179304855828085, + "grad_norm": 361.9169921875, + "learning_rate": 7.108e-06, + "loss": 30.8617, + "step": 35540 + }, + { + "epoch": 0.07181324919096466, + "grad_norm": 82.24534606933594, + "learning_rate": 7.1100000000000005e-06, + "loss": 40.0268, + "step": 35550 + }, + { + "epoch": 0.07183344982364848, + "grad_norm": 210.82386779785156, + "learning_rate": 7.1120000000000015e-06, + "loss": 28.475, + "step": 35560 + }, + { + "epoch": 0.0718536504563323, + "grad_norm": 164.13160705566406, + "learning_rate": 7.114000000000001e-06, + "loss": 11.687, + "step": 35570 + }, + { + "epoch": 0.0718738510890161, + "grad_norm": 162.2367401123047, + "learning_rate": 7.116000000000001e-06, + "loss": 27.1376, + "step": 35580 + }, + { + "epoch": 0.07189405172169992, + "grad_norm": 437.4132995605469, + "learning_rate": 7.118e-06, + "loss": 34.7301, + "step": 35590 + }, + { + "epoch": 0.07191425235438374, + "grad_norm": 242.47047424316406, + "learning_rate": 7.1200000000000004e-06, + "loss": 11.6893, + "step": 35600 + }, + { + "epoch": 0.07193445298706755, + "grad_norm": 160.79574584960938, + "learning_rate": 7.1220000000000014e-06, + "loss": 15.1122, + "step": 35610 + }, + { + "epoch": 0.07195465361975137, + "grad_norm": 371.9103698730469, + "learning_rate": 7.124000000000001e-06, + "loss": 19.397, + "step": 35620 + }, + { + "epoch": 0.07197485425243519, + "grad_norm": 465.20501708984375, + "learning_rate": 7.126000000000001e-06, + "loss": 25.835, + "step": 35630 + }, + { + "epoch": 0.07199505488511901, + "grad_norm": 170.93829345703125, + "learning_rate": 7.128e-06, + "loss": 26.0983, + "step": 35640 + }, + { + "epoch": 0.07201525551780281, + "grad_norm": 394.7083435058594, + "learning_rate": 7.13e-06, + "loss": 23.7062, + "step": 35650 + }, + { + "epoch": 0.07203545615048663, + "grad_norm": 258.1009826660156, + "learning_rate": 7.132e-06, + "loss": 14.3192, + "step": 35660 + }, + { + "epoch": 0.07205565678317045, + "grad_norm": 246.60574340820312, + "learning_rate": 7.134000000000001e-06, + "loss": 21.8068, + "step": 35670 + }, + { + "epoch": 0.07207585741585426, + "grad_norm": 361.5950622558594, + "learning_rate": 7.136000000000001e-06, + "loss": 36.3729, + "step": 35680 + }, + { + "epoch": 0.07209605804853808, + "grad_norm": 130.7515411376953, + "learning_rate": 7.138e-06, + "loss": 30.8072, + "step": 35690 + }, + { + "epoch": 0.0721162586812219, + "grad_norm": 558.0682983398438, + "learning_rate": 7.14e-06, + "loss": 39.1854, + "step": 35700 + }, + { + "epoch": 0.07213645931390571, + "grad_norm": 71.65023040771484, + "learning_rate": 7.142e-06, + "loss": 27.4871, + "step": 35710 + }, + { + "epoch": 0.07215665994658953, + "grad_norm": 288.7943115234375, + "learning_rate": 7.1440000000000005e-06, + "loss": 14.6281, + "step": 35720 + }, + { + "epoch": 0.07217686057927335, + "grad_norm": 246.38893127441406, + "learning_rate": 7.146000000000001e-06, + "loss": 22.2566, + "step": 35730 + }, + { + "epoch": 0.07219706121195715, + "grad_norm": 134.45681762695312, + "learning_rate": 7.148000000000001e-06, + "loss": 22.4863, + "step": 35740 + }, + { + "epoch": 0.07221726184464097, + "grad_norm": 59.040748596191406, + "learning_rate": 7.15e-06, + "loss": 40.3208, + "step": 35750 + }, + { + "epoch": 0.0722374624773248, + "grad_norm": 475.1350402832031, + "learning_rate": 7.152e-06, + "loss": 26.8597, + "step": 35760 + }, + { + "epoch": 0.0722576631100086, + "grad_norm": 172.88192749023438, + "learning_rate": 7.154000000000001e-06, + "loss": 32.3942, + "step": 35770 + }, + { + "epoch": 0.07227786374269242, + "grad_norm": 478.24957275390625, + "learning_rate": 7.156000000000001e-06, + "loss": 36.7713, + "step": 35780 + }, + { + "epoch": 0.07229806437537624, + "grad_norm": 255.5506591796875, + "learning_rate": 7.158000000000001e-06, + "loss": 22.5891, + "step": 35790 + }, + { + "epoch": 0.07231826500806006, + "grad_norm": 439.4625549316406, + "learning_rate": 7.16e-06, + "loss": 37.9358, + "step": 35800 + }, + { + "epoch": 0.07233846564074387, + "grad_norm": 282.32830810546875, + "learning_rate": 7.162e-06, + "loss": 17.1687, + "step": 35810 + }, + { + "epoch": 0.07235866627342769, + "grad_norm": 421.70745849609375, + "learning_rate": 7.164000000000001e-06, + "loss": 37.5946, + "step": 35820 + }, + { + "epoch": 0.0723788669061115, + "grad_norm": 313.3034973144531, + "learning_rate": 7.1660000000000005e-06, + "loss": 17.9735, + "step": 35830 + }, + { + "epoch": 0.07239906753879531, + "grad_norm": 397.9144592285156, + "learning_rate": 7.168000000000001e-06, + "loss": 20.7027, + "step": 35840 + }, + { + "epoch": 0.07241926817147913, + "grad_norm": 290.4940490722656, + "learning_rate": 7.17e-06, + "loss": 27.6429, + "step": 35850 + }, + { + "epoch": 0.07243946880416295, + "grad_norm": 176.52755737304688, + "learning_rate": 7.172e-06, + "loss": 21.9893, + "step": 35860 + }, + { + "epoch": 0.07245966943684676, + "grad_norm": 411.03887939453125, + "learning_rate": 7.174000000000001e-06, + "loss": 35.1925, + "step": 35870 + }, + { + "epoch": 0.07247987006953058, + "grad_norm": 0.0, + "learning_rate": 7.176e-06, + "loss": 24.5947, + "step": 35880 + }, + { + "epoch": 0.0725000707022144, + "grad_norm": 161.65664672851562, + "learning_rate": 7.1780000000000006e-06, + "loss": 16.1081, + "step": 35890 + }, + { + "epoch": 0.0725202713348982, + "grad_norm": 436.17327880859375, + "learning_rate": 7.180000000000001e-06, + "loss": 39.479, + "step": 35900 + }, + { + "epoch": 0.07254047196758202, + "grad_norm": 475.31060791015625, + "learning_rate": 7.182e-06, + "loss": 25.5334, + "step": 35910 + }, + { + "epoch": 0.07256067260026584, + "grad_norm": 437.42547607421875, + "learning_rate": 7.184000000000001e-06, + "loss": 40.1187, + "step": 35920 + }, + { + "epoch": 0.07258087323294965, + "grad_norm": 106.16314697265625, + "learning_rate": 7.186000000000001e-06, + "loss": 34.4375, + "step": 35930 + }, + { + "epoch": 0.07260107386563347, + "grad_norm": 118.01982879638672, + "learning_rate": 7.1880000000000005e-06, + "loss": 24.4265, + "step": 35940 + }, + { + "epoch": 0.07262127449831729, + "grad_norm": 371.4693908691406, + "learning_rate": 7.190000000000001e-06, + "loss": 17.0825, + "step": 35950 + }, + { + "epoch": 0.07264147513100111, + "grad_norm": 385.18011474609375, + "learning_rate": 7.192e-06, + "loss": 26.4692, + "step": 35960 + }, + { + "epoch": 0.07266167576368492, + "grad_norm": 134.06768798828125, + "learning_rate": 7.194000000000001e-06, + "loss": 19.1417, + "step": 35970 + }, + { + "epoch": 0.07268187639636874, + "grad_norm": 271.87060546875, + "learning_rate": 7.196000000000001e-06, + "loss": 32.2219, + "step": 35980 + }, + { + "epoch": 0.07270207702905256, + "grad_norm": 228.697265625, + "learning_rate": 7.198e-06, + "loss": 15.1079, + "step": 35990 + }, + { + "epoch": 0.07272227766173636, + "grad_norm": 830.1871948242188, + "learning_rate": 7.2000000000000005e-06, + "loss": 32.8203, + "step": 36000 + }, + { + "epoch": 0.07274247829442018, + "grad_norm": 510.1455993652344, + "learning_rate": 7.202e-06, + "loss": 38.1677, + "step": 36010 + }, + { + "epoch": 0.072762678927104, + "grad_norm": 582.5565185546875, + "learning_rate": 7.204000000000001e-06, + "loss": 25.6832, + "step": 36020 + }, + { + "epoch": 0.07278287955978781, + "grad_norm": 218.80935668945312, + "learning_rate": 7.206000000000001e-06, + "loss": 27.2106, + "step": 36030 + }, + { + "epoch": 0.07280308019247163, + "grad_norm": 294.893310546875, + "learning_rate": 7.208e-06, + "loss": 21.015, + "step": 36040 + }, + { + "epoch": 0.07282328082515545, + "grad_norm": 334.14752197265625, + "learning_rate": 7.2100000000000004e-06, + "loss": 18.0615, + "step": 36050 + }, + { + "epoch": 0.07284348145783925, + "grad_norm": 288.8575134277344, + "learning_rate": 7.212e-06, + "loss": 22.7721, + "step": 36060 + }, + { + "epoch": 0.07286368209052307, + "grad_norm": 358.3299255371094, + "learning_rate": 7.214000000000001e-06, + "loss": 21.3283, + "step": 36070 + }, + { + "epoch": 0.0728838827232069, + "grad_norm": 315.20782470703125, + "learning_rate": 7.216000000000001e-06, + "loss": 15.5972, + "step": 36080 + }, + { + "epoch": 0.0729040833558907, + "grad_norm": 361.19549560546875, + "learning_rate": 7.218e-06, + "loss": 25.5272, + "step": 36090 + }, + { + "epoch": 0.07292428398857452, + "grad_norm": 177.4301300048828, + "learning_rate": 7.22e-06, + "loss": 34.7283, + "step": 36100 + }, + { + "epoch": 0.07294448462125834, + "grad_norm": 199.40557861328125, + "learning_rate": 7.2220000000000005e-06, + "loss": 18.4738, + "step": 36110 + }, + { + "epoch": 0.07296468525394216, + "grad_norm": 711.8712768554688, + "learning_rate": 7.224000000000001e-06, + "loss": 38.1349, + "step": 36120 + }, + { + "epoch": 0.07298488588662597, + "grad_norm": 207.1460723876953, + "learning_rate": 7.226000000000001e-06, + "loss": 14.6274, + "step": 36130 + }, + { + "epoch": 0.07300508651930979, + "grad_norm": 218.71484375, + "learning_rate": 7.228000000000001e-06, + "loss": 22.8234, + "step": 36140 + }, + { + "epoch": 0.0730252871519936, + "grad_norm": 232.43943786621094, + "learning_rate": 7.23e-06, + "loss": 23.2104, + "step": 36150 + }, + { + "epoch": 0.07304548778467741, + "grad_norm": 500.0850830078125, + "learning_rate": 7.232e-06, + "loss": 18.8728, + "step": 36160 + }, + { + "epoch": 0.07306568841736123, + "grad_norm": 403.644775390625, + "learning_rate": 7.234000000000001e-06, + "loss": 25.2967, + "step": 36170 + }, + { + "epoch": 0.07308588905004505, + "grad_norm": 321.7872619628906, + "learning_rate": 7.236000000000001e-06, + "loss": 33.8395, + "step": 36180 + }, + { + "epoch": 0.07310608968272886, + "grad_norm": 103.05240631103516, + "learning_rate": 7.238000000000001e-06, + "loss": 21.2051, + "step": 36190 + }, + { + "epoch": 0.07312629031541268, + "grad_norm": 210.76727294921875, + "learning_rate": 7.24e-06, + "loss": 13.8069, + "step": 36200 + }, + { + "epoch": 0.0731464909480965, + "grad_norm": 250.00503540039062, + "learning_rate": 7.242e-06, + "loss": 27.6499, + "step": 36210 + }, + { + "epoch": 0.0731666915807803, + "grad_norm": 211.04156494140625, + "learning_rate": 7.244000000000001e-06, + "loss": 25.0457, + "step": 36220 + }, + { + "epoch": 0.07318689221346412, + "grad_norm": 104.9522476196289, + "learning_rate": 7.246000000000001e-06, + "loss": 30.3264, + "step": 36230 + }, + { + "epoch": 0.07320709284614794, + "grad_norm": 280.8627014160156, + "learning_rate": 7.248000000000001e-06, + "loss": 17.7601, + "step": 36240 + }, + { + "epoch": 0.07322729347883175, + "grad_norm": 923.1336059570312, + "learning_rate": 7.25e-06, + "loss": 23.2485, + "step": 36250 + }, + { + "epoch": 0.07324749411151557, + "grad_norm": 609.1475219726562, + "learning_rate": 7.252e-06, + "loss": 41.1142, + "step": 36260 + }, + { + "epoch": 0.07326769474419939, + "grad_norm": 644.6527099609375, + "learning_rate": 7.254000000000001e-06, + "loss": 25.7066, + "step": 36270 + }, + { + "epoch": 0.07328789537688321, + "grad_norm": 189.8779296875, + "learning_rate": 7.2560000000000005e-06, + "loss": 27.0845, + "step": 36280 + }, + { + "epoch": 0.07330809600956702, + "grad_norm": 502.730712890625, + "learning_rate": 7.258000000000001e-06, + "loss": 25.2417, + "step": 36290 + }, + { + "epoch": 0.07332829664225084, + "grad_norm": 344.7130126953125, + "learning_rate": 7.260000000000001e-06, + "loss": 34.3557, + "step": 36300 + }, + { + "epoch": 0.07334849727493466, + "grad_norm": 240.31741333007812, + "learning_rate": 7.262e-06, + "loss": 34.4685, + "step": 36310 + }, + { + "epoch": 0.07336869790761846, + "grad_norm": 356.3848876953125, + "learning_rate": 7.264000000000001e-06, + "loss": 21.1377, + "step": 36320 + }, + { + "epoch": 0.07338889854030228, + "grad_norm": 177.12232971191406, + "learning_rate": 7.266000000000001e-06, + "loss": 27.8209, + "step": 36330 + }, + { + "epoch": 0.0734090991729861, + "grad_norm": 492.5040588378906, + "learning_rate": 7.2680000000000005e-06, + "loss": 32.8956, + "step": 36340 + }, + { + "epoch": 0.07342929980566991, + "grad_norm": 448.3660583496094, + "learning_rate": 7.270000000000001e-06, + "loss": 24.272, + "step": 36350 + }, + { + "epoch": 0.07344950043835373, + "grad_norm": 336.73162841796875, + "learning_rate": 7.272e-06, + "loss": 26.5316, + "step": 36360 + }, + { + "epoch": 0.07346970107103755, + "grad_norm": 362.0407409667969, + "learning_rate": 7.274000000000001e-06, + "loss": 30.0662, + "step": 36370 + }, + { + "epoch": 0.07348990170372136, + "grad_norm": 234.1678009033203, + "learning_rate": 7.276000000000001e-06, + "loss": 25.709, + "step": 36380 + }, + { + "epoch": 0.07351010233640518, + "grad_norm": 317.2973937988281, + "learning_rate": 7.2780000000000005e-06, + "loss": 35.7133, + "step": 36390 + }, + { + "epoch": 0.073530302969089, + "grad_norm": 180.00048828125, + "learning_rate": 7.280000000000001e-06, + "loss": 18.6612, + "step": 36400 + }, + { + "epoch": 0.0735505036017728, + "grad_norm": 533.5408325195312, + "learning_rate": 7.282e-06, + "loss": 36.5794, + "step": 36410 + }, + { + "epoch": 0.07357070423445662, + "grad_norm": 228.9940643310547, + "learning_rate": 7.284000000000001e-06, + "loss": 28.2655, + "step": 36420 + }, + { + "epoch": 0.07359090486714044, + "grad_norm": 518.9951171875, + "learning_rate": 7.286000000000001e-06, + "loss": 32.3436, + "step": 36430 + }, + { + "epoch": 0.07361110549982426, + "grad_norm": 299.5155334472656, + "learning_rate": 7.288e-06, + "loss": 23.8606, + "step": 36440 + }, + { + "epoch": 0.07363130613250807, + "grad_norm": 226.43161010742188, + "learning_rate": 7.2900000000000005e-06, + "loss": 15.8627, + "step": 36450 + }, + { + "epoch": 0.07365150676519189, + "grad_norm": 645.637939453125, + "learning_rate": 7.292e-06, + "loss": 45.8218, + "step": 36460 + }, + { + "epoch": 0.07367170739787571, + "grad_norm": 293.798583984375, + "learning_rate": 7.294000000000001e-06, + "loss": 32.9752, + "step": 36470 + }, + { + "epoch": 0.07369190803055951, + "grad_norm": 520.4822998046875, + "learning_rate": 7.296000000000001e-06, + "loss": 22.4962, + "step": 36480 + }, + { + "epoch": 0.07371210866324333, + "grad_norm": 254.99899291992188, + "learning_rate": 7.298e-06, + "loss": 32.3449, + "step": 36490 + }, + { + "epoch": 0.07373230929592715, + "grad_norm": 393.9755554199219, + "learning_rate": 7.3e-06, + "loss": 24.0851, + "step": 36500 + }, + { + "epoch": 0.07375250992861096, + "grad_norm": 236.24923706054688, + "learning_rate": 7.3020000000000006e-06, + "loss": 21.1192, + "step": 36510 + }, + { + "epoch": 0.07377271056129478, + "grad_norm": 236.36317443847656, + "learning_rate": 7.304000000000001e-06, + "loss": 23.2971, + "step": 36520 + }, + { + "epoch": 0.0737929111939786, + "grad_norm": 440.5664367675781, + "learning_rate": 7.306000000000001e-06, + "loss": 25.8003, + "step": 36530 + }, + { + "epoch": 0.0738131118266624, + "grad_norm": 278.49822998046875, + "learning_rate": 7.308000000000001e-06, + "loss": 37.1441, + "step": 36540 + }, + { + "epoch": 0.07383331245934623, + "grad_norm": 51.29787826538086, + "learning_rate": 7.31e-06, + "loss": 30.1834, + "step": 36550 + }, + { + "epoch": 0.07385351309203005, + "grad_norm": 241.89549255371094, + "learning_rate": 7.3120000000000005e-06, + "loss": 13.0701, + "step": 36560 + }, + { + "epoch": 0.07387371372471385, + "grad_norm": 296.49591064453125, + "learning_rate": 7.3140000000000015e-06, + "loss": 49.7003, + "step": 36570 + }, + { + "epoch": 0.07389391435739767, + "grad_norm": 270.5970764160156, + "learning_rate": 7.316000000000001e-06, + "loss": 25.1562, + "step": 36580 + }, + { + "epoch": 0.07391411499008149, + "grad_norm": 243.77528381347656, + "learning_rate": 7.318000000000001e-06, + "loss": 29.228, + "step": 36590 + }, + { + "epoch": 0.0739343156227653, + "grad_norm": 1103.41943359375, + "learning_rate": 7.32e-06, + "loss": 59.5533, + "step": 36600 + }, + { + "epoch": 0.07395451625544912, + "grad_norm": 283.4581604003906, + "learning_rate": 7.322e-06, + "loss": 19.2951, + "step": 36610 + }, + { + "epoch": 0.07397471688813294, + "grad_norm": 365.1043395996094, + "learning_rate": 7.324000000000001e-06, + "loss": 40.0341, + "step": 36620 + }, + { + "epoch": 0.07399491752081676, + "grad_norm": 139.1345672607422, + "learning_rate": 7.326000000000001e-06, + "loss": 39.2212, + "step": 36630 + }, + { + "epoch": 0.07401511815350056, + "grad_norm": 112.60505676269531, + "learning_rate": 7.328000000000001e-06, + "loss": 22.9324, + "step": 36640 + }, + { + "epoch": 0.07403531878618438, + "grad_norm": 593.785400390625, + "learning_rate": 7.33e-06, + "loss": 34.1412, + "step": 36650 + }, + { + "epoch": 0.0740555194188682, + "grad_norm": 553.2237548828125, + "learning_rate": 7.332e-06, + "loss": 47.1545, + "step": 36660 + }, + { + "epoch": 0.07407572005155201, + "grad_norm": 208.7756805419922, + "learning_rate": 7.334000000000001e-06, + "loss": 21.578, + "step": 36670 + }, + { + "epoch": 0.07409592068423583, + "grad_norm": 476.149658203125, + "learning_rate": 7.3360000000000006e-06, + "loss": 37.0521, + "step": 36680 + }, + { + "epoch": 0.07411612131691965, + "grad_norm": 177.48573303222656, + "learning_rate": 7.338000000000001e-06, + "loss": 16.8563, + "step": 36690 + }, + { + "epoch": 0.07413632194960346, + "grad_norm": 308.2784118652344, + "learning_rate": 7.340000000000001e-06, + "loss": 17.1021, + "step": 36700 + }, + { + "epoch": 0.07415652258228728, + "grad_norm": 208.59652709960938, + "learning_rate": 7.342e-06, + "loss": 17.2225, + "step": 36710 + }, + { + "epoch": 0.0741767232149711, + "grad_norm": 55.97196578979492, + "learning_rate": 7.344000000000001e-06, + "loss": 29.164, + "step": 36720 + }, + { + "epoch": 0.0741969238476549, + "grad_norm": 121.7552490234375, + "learning_rate": 7.346000000000001e-06, + "loss": 40.1761, + "step": 36730 + }, + { + "epoch": 0.07421712448033872, + "grad_norm": 199.17617797851562, + "learning_rate": 7.348000000000001e-06, + "loss": 49.4342, + "step": 36740 + }, + { + "epoch": 0.07423732511302254, + "grad_norm": 212.97402954101562, + "learning_rate": 7.350000000000001e-06, + "loss": 25.9432, + "step": 36750 + }, + { + "epoch": 0.07425752574570635, + "grad_norm": 287.5586242675781, + "learning_rate": 7.352e-06, + "loss": 28.8848, + "step": 36760 + }, + { + "epoch": 0.07427772637839017, + "grad_norm": 570.6173095703125, + "learning_rate": 7.354000000000001e-06, + "loss": 19.9024, + "step": 36770 + }, + { + "epoch": 0.07429792701107399, + "grad_norm": 256.5933837890625, + "learning_rate": 7.356000000000001e-06, + "loss": 28.7767, + "step": 36780 + }, + { + "epoch": 0.07431812764375781, + "grad_norm": 204.2556610107422, + "learning_rate": 7.3580000000000005e-06, + "loss": 38.7115, + "step": 36790 + }, + { + "epoch": 0.07433832827644161, + "grad_norm": 113.79093933105469, + "learning_rate": 7.360000000000001e-06, + "loss": 17.7016, + "step": 36800 + }, + { + "epoch": 0.07435852890912543, + "grad_norm": 116.8794937133789, + "learning_rate": 7.362e-06, + "loss": 43.6426, + "step": 36810 + }, + { + "epoch": 0.07437872954180925, + "grad_norm": 284.8735656738281, + "learning_rate": 7.364000000000001e-06, + "loss": 47.9132, + "step": 36820 + }, + { + "epoch": 0.07439893017449306, + "grad_norm": 127.1036376953125, + "learning_rate": 7.366000000000001e-06, + "loss": 22.8843, + "step": 36830 + }, + { + "epoch": 0.07441913080717688, + "grad_norm": 182.51087951660156, + "learning_rate": 7.3680000000000004e-06, + "loss": 18.0095, + "step": 36840 + }, + { + "epoch": 0.0744393314398607, + "grad_norm": 461.25006103515625, + "learning_rate": 7.370000000000001e-06, + "loss": 27.5946, + "step": 36850 + }, + { + "epoch": 0.07445953207254451, + "grad_norm": 233.8284149169922, + "learning_rate": 7.372e-06, + "loss": 29.7888, + "step": 36860 + }, + { + "epoch": 0.07447973270522833, + "grad_norm": 129.74957275390625, + "learning_rate": 7.374000000000001e-06, + "loss": 9.5533, + "step": 36870 + }, + { + "epoch": 0.07449993333791215, + "grad_norm": 470.76922607421875, + "learning_rate": 7.376000000000001e-06, + "loss": 20.9399, + "step": 36880 + }, + { + "epoch": 0.07452013397059595, + "grad_norm": 285.8863220214844, + "learning_rate": 7.378e-06, + "loss": 19.7481, + "step": 36890 + }, + { + "epoch": 0.07454033460327977, + "grad_norm": 264.63531494140625, + "learning_rate": 7.3800000000000005e-06, + "loss": 25.8509, + "step": 36900 + }, + { + "epoch": 0.0745605352359636, + "grad_norm": 287.1709899902344, + "learning_rate": 7.382000000000001e-06, + "loss": 27.0948, + "step": 36910 + }, + { + "epoch": 0.0745807358686474, + "grad_norm": 441.5046081542969, + "learning_rate": 7.384e-06, + "loss": 34.6011, + "step": 36920 + }, + { + "epoch": 0.07460093650133122, + "grad_norm": 277.13397216796875, + "learning_rate": 7.386000000000001e-06, + "loss": 24.864, + "step": 36930 + }, + { + "epoch": 0.07462113713401504, + "grad_norm": 443.7454833984375, + "learning_rate": 7.388000000000001e-06, + "loss": 29.8873, + "step": 36940 + }, + { + "epoch": 0.07464133776669886, + "grad_norm": 242.94688415527344, + "learning_rate": 7.39e-06, + "loss": 31.2921, + "step": 36950 + }, + { + "epoch": 0.07466153839938267, + "grad_norm": 211.485595703125, + "learning_rate": 7.3920000000000005e-06, + "loss": 17.1606, + "step": 36960 + }, + { + "epoch": 0.07468173903206649, + "grad_norm": 212.78688049316406, + "learning_rate": 7.394e-06, + "loss": 23.3433, + "step": 36970 + }, + { + "epoch": 0.0747019396647503, + "grad_norm": 402.8143005371094, + "learning_rate": 7.396000000000001e-06, + "loss": 30.0493, + "step": 36980 + }, + { + "epoch": 0.07472214029743411, + "grad_norm": 73.41931915283203, + "learning_rate": 7.398000000000001e-06, + "loss": 20.3851, + "step": 36990 + }, + { + "epoch": 0.07474234093011793, + "grad_norm": 119.01841735839844, + "learning_rate": 7.4e-06, + "loss": 28.2371, + "step": 37000 + }, + { + "epoch": 0.07476254156280175, + "grad_norm": 245.3097686767578, + "learning_rate": 7.4020000000000005e-06, + "loss": 27.0031, + "step": 37010 + }, + { + "epoch": 0.07478274219548556, + "grad_norm": 382.00225830078125, + "learning_rate": 7.404e-06, + "loss": 21.0526, + "step": 37020 + }, + { + "epoch": 0.07480294282816938, + "grad_norm": 417.96441650390625, + "learning_rate": 7.406000000000001e-06, + "loss": 22.2644, + "step": 37030 + }, + { + "epoch": 0.0748231434608532, + "grad_norm": 262.6156921386719, + "learning_rate": 7.408000000000001e-06, + "loss": 21.3585, + "step": 37040 + }, + { + "epoch": 0.074843344093537, + "grad_norm": 147.9910125732422, + "learning_rate": 7.41e-06, + "loss": 31.688, + "step": 37050 + }, + { + "epoch": 0.07486354472622082, + "grad_norm": 330.61212158203125, + "learning_rate": 7.412e-06, + "loss": 15.5659, + "step": 37060 + }, + { + "epoch": 0.07488374535890464, + "grad_norm": 355.9070129394531, + "learning_rate": 7.4140000000000005e-06, + "loss": 31.2333, + "step": 37070 + }, + { + "epoch": 0.07490394599158845, + "grad_norm": 415.4013977050781, + "learning_rate": 7.416000000000001e-06, + "loss": 31.6887, + "step": 37080 + }, + { + "epoch": 0.07492414662427227, + "grad_norm": 215.07850646972656, + "learning_rate": 7.418000000000001e-06, + "loss": 31.2258, + "step": 37090 + }, + { + "epoch": 0.07494434725695609, + "grad_norm": 142.98353576660156, + "learning_rate": 7.420000000000001e-06, + "loss": 38.7785, + "step": 37100 + }, + { + "epoch": 0.07496454788963991, + "grad_norm": 105.02717590332031, + "learning_rate": 7.422e-06, + "loss": 23.1669, + "step": 37110 + }, + { + "epoch": 0.07498474852232372, + "grad_norm": 336.7762145996094, + "learning_rate": 7.424e-06, + "loss": 14.6662, + "step": 37120 + }, + { + "epoch": 0.07500494915500754, + "grad_norm": 409.864990234375, + "learning_rate": 7.426000000000001e-06, + "loss": 27.6743, + "step": 37130 + }, + { + "epoch": 0.07502514978769136, + "grad_norm": 268.7269287109375, + "learning_rate": 7.428000000000001e-06, + "loss": 24.6566, + "step": 37140 + }, + { + "epoch": 0.07504535042037516, + "grad_norm": 312.6243896484375, + "learning_rate": 7.430000000000001e-06, + "loss": 43.2453, + "step": 37150 + }, + { + "epoch": 0.07506555105305898, + "grad_norm": 260.17401123046875, + "learning_rate": 7.432e-06, + "loss": 20.4968, + "step": 37160 + }, + { + "epoch": 0.0750857516857428, + "grad_norm": 255.1354522705078, + "learning_rate": 7.434e-06, + "loss": 22.17, + "step": 37170 + }, + { + "epoch": 0.07510595231842661, + "grad_norm": 217.8333282470703, + "learning_rate": 7.436000000000001e-06, + "loss": 12.0146, + "step": 37180 + }, + { + "epoch": 0.07512615295111043, + "grad_norm": 790.7473754882812, + "learning_rate": 7.438000000000001e-06, + "loss": 53.3113, + "step": 37190 + }, + { + "epoch": 0.07514635358379425, + "grad_norm": 408.5172119140625, + "learning_rate": 7.440000000000001e-06, + "loss": 37.1765, + "step": 37200 + }, + { + "epoch": 0.07516655421647805, + "grad_norm": 571.9297485351562, + "learning_rate": 7.442e-06, + "loss": 42.8719, + "step": 37210 + }, + { + "epoch": 0.07518675484916187, + "grad_norm": 215.4970703125, + "learning_rate": 7.444e-06, + "loss": 19.6267, + "step": 37220 + }, + { + "epoch": 0.0752069554818457, + "grad_norm": 76.0966567993164, + "learning_rate": 7.446000000000001e-06, + "loss": 23.3036, + "step": 37230 + }, + { + "epoch": 0.0752271561145295, + "grad_norm": 254.29229736328125, + "learning_rate": 7.4480000000000005e-06, + "loss": 35.8291, + "step": 37240 + }, + { + "epoch": 0.07524735674721332, + "grad_norm": 223.7094268798828, + "learning_rate": 7.450000000000001e-06, + "loss": 23.6933, + "step": 37250 + }, + { + "epoch": 0.07526755737989714, + "grad_norm": 251.1807861328125, + "learning_rate": 7.452e-06, + "loss": 27.0036, + "step": 37260 + }, + { + "epoch": 0.07528775801258096, + "grad_norm": 342.2859802246094, + "learning_rate": 7.454e-06, + "loss": 27.8568, + "step": 37270 + }, + { + "epoch": 0.07530795864526477, + "grad_norm": 245.50550842285156, + "learning_rate": 7.456000000000001e-06, + "loss": 35.6972, + "step": 37280 + }, + { + "epoch": 0.07532815927794859, + "grad_norm": 215.06735229492188, + "learning_rate": 7.458e-06, + "loss": 16.0826, + "step": 37290 + }, + { + "epoch": 0.0753483599106324, + "grad_norm": 246.951904296875, + "learning_rate": 7.4600000000000006e-06, + "loss": 19.3, + "step": 37300 + }, + { + "epoch": 0.07536856054331621, + "grad_norm": 383.22308349609375, + "learning_rate": 7.462000000000001e-06, + "loss": 25.5818, + "step": 37310 + }, + { + "epoch": 0.07538876117600003, + "grad_norm": 252.7657470703125, + "learning_rate": 7.464e-06, + "loss": 36.939, + "step": 37320 + }, + { + "epoch": 0.07540896180868385, + "grad_norm": 163.82577514648438, + "learning_rate": 7.466000000000001e-06, + "loss": 29.758, + "step": 37330 + }, + { + "epoch": 0.07542916244136766, + "grad_norm": 460.1439514160156, + "learning_rate": 7.468000000000001e-06, + "loss": 33.5917, + "step": 37340 + }, + { + "epoch": 0.07544936307405148, + "grad_norm": 194.50894165039062, + "learning_rate": 7.4700000000000005e-06, + "loss": 27.0045, + "step": 37350 + }, + { + "epoch": 0.0754695637067353, + "grad_norm": 326.9264831542969, + "learning_rate": 7.472000000000001e-06, + "loss": 19.6958, + "step": 37360 + }, + { + "epoch": 0.0754897643394191, + "grad_norm": 203.56382751464844, + "learning_rate": 7.474e-06, + "loss": 20.1575, + "step": 37370 + }, + { + "epoch": 0.07550996497210292, + "grad_norm": 374.2593994140625, + "learning_rate": 7.476000000000001e-06, + "loss": 19.7323, + "step": 37380 + }, + { + "epoch": 0.07553016560478674, + "grad_norm": 535.1461181640625, + "learning_rate": 7.478000000000001e-06, + "loss": 31.1814, + "step": 37390 + }, + { + "epoch": 0.07555036623747055, + "grad_norm": 367.5732116699219, + "learning_rate": 7.48e-06, + "loss": 24.6602, + "step": 37400 + }, + { + "epoch": 0.07557056687015437, + "grad_norm": 663.7953491210938, + "learning_rate": 7.4820000000000005e-06, + "loss": 33.5646, + "step": 37410 + }, + { + "epoch": 0.07559076750283819, + "grad_norm": 411.58148193359375, + "learning_rate": 7.484e-06, + "loss": 27.3003, + "step": 37420 + }, + { + "epoch": 0.07561096813552201, + "grad_norm": 427.3364562988281, + "learning_rate": 7.486000000000001e-06, + "loss": 29.1163, + "step": 37430 + }, + { + "epoch": 0.07563116876820582, + "grad_norm": 224.48916625976562, + "learning_rate": 7.488000000000001e-06, + "loss": 36.7709, + "step": 37440 + }, + { + "epoch": 0.07565136940088964, + "grad_norm": 288.510986328125, + "learning_rate": 7.49e-06, + "loss": 35.8084, + "step": 37450 + }, + { + "epoch": 0.07567157003357346, + "grad_norm": 149.86940002441406, + "learning_rate": 7.4920000000000004e-06, + "loss": 24.0802, + "step": 37460 + }, + { + "epoch": 0.07569177066625726, + "grad_norm": 169.2025146484375, + "learning_rate": 7.494000000000001e-06, + "loss": 25.7512, + "step": 37470 + }, + { + "epoch": 0.07571197129894108, + "grad_norm": 202.79107666015625, + "learning_rate": 7.496000000000001e-06, + "loss": 24.6906, + "step": 37480 + }, + { + "epoch": 0.0757321719316249, + "grad_norm": 120.4333724975586, + "learning_rate": 7.498000000000001e-06, + "loss": 14.6184, + "step": 37490 + }, + { + "epoch": 0.07575237256430871, + "grad_norm": 324.7414855957031, + "learning_rate": 7.500000000000001e-06, + "loss": 32.847, + "step": 37500 + }, + { + "epoch": 0.07577257319699253, + "grad_norm": 238.66554260253906, + "learning_rate": 7.502e-06, + "loss": 20.7424, + "step": 37510 + }, + { + "epoch": 0.07579277382967635, + "grad_norm": 293.48760986328125, + "learning_rate": 7.5040000000000005e-06, + "loss": 22.57, + "step": 37520 + }, + { + "epoch": 0.07581297446236016, + "grad_norm": 440.31182861328125, + "learning_rate": 7.506000000000001e-06, + "loss": 29.815, + "step": 37530 + }, + { + "epoch": 0.07583317509504398, + "grad_norm": 137.69004821777344, + "learning_rate": 7.508000000000001e-06, + "loss": 20.7491, + "step": 37540 + }, + { + "epoch": 0.0758533757277278, + "grad_norm": 526.4642944335938, + "learning_rate": 7.510000000000001e-06, + "loss": 15.096, + "step": 37550 + }, + { + "epoch": 0.0758735763604116, + "grad_norm": 470.8855285644531, + "learning_rate": 7.512e-06, + "loss": 29.3815, + "step": 37560 + }, + { + "epoch": 0.07589377699309542, + "grad_norm": 161.48501586914062, + "learning_rate": 7.514e-06, + "loss": 20.7652, + "step": 37570 + }, + { + "epoch": 0.07591397762577924, + "grad_norm": 0.0, + "learning_rate": 7.516000000000001e-06, + "loss": 17.8332, + "step": 37580 + }, + { + "epoch": 0.07593417825846306, + "grad_norm": 603.0989379882812, + "learning_rate": 7.518000000000001e-06, + "loss": 27.5582, + "step": 37590 + }, + { + "epoch": 0.07595437889114687, + "grad_norm": 183.7471923828125, + "learning_rate": 7.520000000000001e-06, + "loss": 30.4387, + "step": 37600 + }, + { + "epoch": 0.07597457952383069, + "grad_norm": 195.45645141601562, + "learning_rate": 7.522e-06, + "loss": 52.3235, + "step": 37610 + }, + { + "epoch": 0.07599478015651451, + "grad_norm": 438.6357116699219, + "learning_rate": 7.524e-06, + "loss": 32.2656, + "step": 37620 + }, + { + "epoch": 0.07601498078919831, + "grad_norm": 324.342529296875, + "learning_rate": 7.526000000000001e-06, + "loss": 13.2064, + "step": 37630 + }, + { + "epoch": 0.07603518142188213, + "grad_norm": 404.1524963378906, + "learning_rate": 7.528000000000001e-06, + "loss": 35.1141, + "step": 37640 + }, + { + "epoch": 0.07605538205456595, + "grad_norm": 245.86949157714844, + "learning_rate": 7.530000000000001e-06, + "loss": 29.8367, + "step": 37650 + }, + { + "epoch": 0.07607558268724976, + "grad_norm": 205.048828125, + "learning_rate": 7.532e-06, + "loss": 35.5854, + "step": 37660 + }, + { + "epoch": 0.07609578331993358, + "grad_norm": 289.99774169921875, + "learning_rate": 7.534e-06, + "loss": 30.7862, + "step": 37670 + }, + { + "epoch": 0.0761159839526174, + "grad_norm": 216.33566284179688, + "learning_rate": 7.536000000000001e-06, + "loss": 11.94, + "step": 37680 + }, + { + "epoch": 0.0761361845853012, + "grad_norm": 475.70025634765625, + "learning_rate": 7.5380000000000005e-06, + "loss": 30.3343, + "step": 37690 + }, + { + "epoch": 0.07615638521798503, + "grad_norm": 100.22981262207031, + "learning_rate": 7.540000000000001e-06, + "loss": 29.544, + "step": 37700 + }, + { + "epoch": 0.07617658585066885, + "grad_norm": 428.4676818847656, + "learning_rate": 7.542000000000001e-06, + "loss": 35.3024, + "step": 37710 + }, + { + "epoch": 0.07619678648335265, + "grad_norm": 449.5159606933594, + "learning_rate": 7.544e-06, + "loss": 29.6146, + "step": 37720 + }, + { + "epoch": 0.07621698711603647, + "grad_norm": 399.134033203125, + "learning_rate": 7.546000000000001e-06, + "loss": 27.0095, + "step": 37730 + }, + { + "epoch": 0.07623718774872029, + "grad_norm": 325.0783996582031, + "learning_rate": 7.548000000000001e-06, + "loss": 29.3766, + "step": 37740 + }, + { + "epoch": 0.07625738838140411, + "grad_norm": 131.785400390625, + "learning_rate": 7.5500000000000006e-06, + "loss": 27.9222, + "step": 37750 + }, + { + "epoch": 0.07627758901408792, + "grad_norm": 279.1617736816406, + "learning_rate": 7.552000000000001e-06, + "loss": 23.7648, + "step": 37760 + }, + { + "epoch": 0.07629778964677174, + "grad_norm": 256.0752258300781, + "learning_rate": 7.554e-06, + "loss": 28.1566, + "step": 37770 + }, + { + "epoch": 0.07631799027945556, + "grad_norm": 584.557861328125, + "learning_rate": 7.556000000000001e-06, + "loss": 26.8847, + "step": 37780 + }, + { + "epoch": 0.07633819091213936, + "grad_norm": 1256.2279052734375, + "learning_rate": 7.558000000000001e-06, + "loss": 32.9997, + "step": 37790 + }, + { + "epoch": 0.07635839154482318, + "grad_norm": 201.48388671875, + "learning_rate": 7.5600000000000005e-06, + "loss": 21.9864, + "step": 37800 + }, + { + "epoch": 0.076378592177507, + "grad_norm": 55.078330993652344, + "learning_rate": 7.562000000000001e-06, + "loss": 26.4167, + "step": 37810 + }, + { + "epoch": 0.07639879281019081, + "grad_norm": 175.41482543945312, + "learning_rate": 7.564e-06, + "loss": 29.6828, + "step": 37820 + }, + { + "epoch": 0.07641899344287463, + "grad_norm": 230.7091522216797, + "learning_rate": 7.566000000000001e-06, + "loss": 43.724, + "step": 37830 + }, + { + "epoch": 0.07643919407555845, + "grad_norm": 123.84848022460938, + "learning_rate": 7.568000000000001e-06, + "loss": 51.0016, + "step": 37840 + }, + { + "epoch": 0.07645939470824226, + "grad_norm": 361.093017578125, + "learning_rate": 7.57e-06, + "loss": 21.1311, + "step": 37850 + }, + { + "epoch": 0.07647959534092608, + "grad_norm": 568.5448608398438, + "learning_rate": 7.5720000000000005e-06, + "loss": 25.8015, + "step": 37860 + }, + { + "epoch": 0.0764997959736099, + "grad_norm": 303.17047119140625, + "learning_rate": 7.574e-06, + "loss": 12.6588, + "step": 37870 + }, + { + "epoch": 0.0765199966062937, + "grad_norm": 362.2989196777344, + "learning_rate": 7.576000000000001e-06, + "loss": 21.2263, + "step": 37880 + }, + { + "epoch": 0.07654019723897752, + "grad_norm": 1215.2640380859375, + "learning_rate": 7.578000000000001e-06, + "loss": 30.4012, + "step": 37890 + }, + { + "epoch": 0.07656039787166134, + "grad_norm": 220.96456909179688, + "learning_rate": 7.58e-06, + "loss": 25.9396, + "step": 37900 + }, + { + "epoch": 0.07658059850434516, + "grad_norm": 170.63856506347656, + "learning_rate": 7.582e-06, + "loss": 24.1375, + "step": 37910 + }, + { + "epoch": 0.07660079913702897, + "grad_norm": 239.08563232421875, + "learning_rate": 7.5840000000000006e-06, + "loss": 32.5399, + "step": 37920 + }, + { + "epoch": 0.07662099976971279, + "grad_norm": 218.6236572265625, + "learning_rate": 7.586000000000001e-06, + "loss": 25.7304, + "step": 37930 + }, + { + "epoch": 0.07664120040239661, + "grad_norm": 143.8291473388672, + "learning_rate": 7.588000000000001e-06, + "loss": 17.6976, + "step": 37940 + }, + { + "epoch": 0.07666140103508041, + "grad_norm": 518.2284545898438, + "learning_rate": 7.590000000000001e-06, + "loss": 32.8466, + "step": 37950 + }, + { + "epoch": 0.07668160166776423, + "grad_norm": 638.3121337890625, + "learning_rate": 7.592e-06, + "loss": 28.6176, + "step": 37960 + }, + { + "epoch": 0.07670180230044805, + "grad_norm": 358.8909606933594, + "learning_rate": 7.5940000000000005e-06, + "loss": 17.4063, + "step": 37970 + }, + { + "epoch": 0.07672200293313186, + "grad_norm": 872.4479370117188, + "learning_rate": 7.5960000000000015e-06, + "loss": 41.7478, + "step": 37980 + }, + { + "epoch": 0.07674220356581568, + "grad_norm": 452.6264953613281, + "learning_rate": 7.598000000000001e-06, + "loss": 51.4937, + "step": 37990 + }, + { + "epoch": 0.0767624041984995, + "grad_norm": 527.2770385742188, + "learning_rate": 7.600000000000001e-06, + "loss": 45.5283, + "step": 38000 + }, + { + "epoch": 0.07678260483118331, + "grad_norm": 215.32249450683594, + "learning_rate": 7.602e-06, + "loss": 13.5002, + "step": 38010 + }, + { + "epoch": 0.07680280546386713, + "grad_norm": 340.856689453125, + "learning_rate": 7.604e-06, + "loss": 23.8508, + "step": 38020 + }, + { + "epoch": 0.07682300609655095, + "grad_norm": 341.5080261230469, + "learning_rate": 7.606000000000001e-06, + "loss": 24.837, + "step": 38030 + }, + { + "epoch": 0.07684320672923475, + "grad_norm": 315.8214111328125, + "learning_rate": 7.608000000000001e-06, + "loss": 26.5996, + "step": 38040 + }, + { + "epoch": 0.07686340736191857, + "grad_norm": 34.659786224365234, + "learning_rate": 7.610000000000001e-06, + "loss": 16.2948, + "step": 38050 + }, + { + "epoch": 0.0768836079946024, + "grad_norm": 385.8763732910156, + "learning_rate": 7.612e-06, + "loss": 30.8735, + "step": 38060 + }, + { + "epoch": 0.07690380862728621, + "grad_norm": 216.09523010253906, + "learning_rate": 7.614e-06, + "loss": 36.05, + "step": 38070 + }, + { + "epoch": 0.07692400925997002, + "grad_norm": 476.50372314453125, + "learning_rate": 7.616000000000001e-06, + "loss": 24.4925, + "step": 38080 + }, + { + "epoch": 0.07694420989265384, + "grad_norm": 422.1429138183594, + "learning_rate": 7.618000000000001e-06, + "loss": 38.6426, + "step": 38090 + }, + { + "epoch": 0.07696441052533766, + "grad_norm": 453.0693664550781, + "learning_rate": 7.620000000000001e-06, + "loss": 29.0683, + "step": 38100 + }, + { + "epoch": 0.07698461115802147, + "grad_norm": 96.81813049316406, + "learning_rate": 7.622000000000001e-06, + "loss": 55.061, + "step": 38110 + }, + { + "epoch": 0.07700481179070529, + "grad_norm": 230.05735778808594, + "learning_rate": 7.624e-06, + "loss": 31.8604, + "step": 38120 + }, + { + "epoch": 0.0770250124233891, + "grad_norm": 345.4097595214844, + "learning_rate": 7.626e-06, + "loss": 21.2156, + "step": 38130 + }, + { + "epoch": 0.07704521305607291, + "grad_norm": 425.7206115722656, + "learning_rate": 7.628000000000001e-06, + "loss": 26.9501, + "step": 38140 + }, + { + "epoch": 0.07706541368875673, + "grad_norm": 570.3914794921875, + "learning_rate": 7.630000000000001e-06, + "loss": 27.5759, + "step": 38150 + }, + { + "epoch": 0.07708561432144055, + "grad_norm": 61.713958740234375, + "learning_rate": 7.632e-06, + "loss": 23.2355, + "step": 38160 + }, + { + "epoch": 0.07710581495412436, + "grad_norm": 384.5368347167969, + "learning_rate": 7.634e-06, + "loss": 32.9063, + "step": 38170 + }, + { + "epoch": 0.07712601558680818, + "grad_norm": 433.4608459472656, + "learning_rate": 7.636e-06, + "loss": 24.4792, + "step": 38180 + }, + { + "epoch": 0.077146216219492, + "grad_norm": 223.1848907470703, + "learning_rate": 7.638e-06, + "loss": 20.5868, + "step": 38190 + }, + { + "epoch": 0.0771664168521758, + "grad_norm": 408.47845458984375, + "learning_rate": 7.640000000000001e-06, + "loss": 34.8088, + "step": 38200 + }, + { + "epoch": 0.07718661748485962, + "grad_norm": 336.5214538574219, + "learning_rate": 7.642e-06, + "loss": 28.309, + "step": 38210 + }, + { + "epoch": 0.07720681811754344, + "grad_norm": 384.3644104003906, + "learning_rate": 7.644e-06, + "loss": 21.7772, + "step": 38220 + }, + { + "epoch": 0.07722701875022726, + "grad_norm": 416.7187194824219, + "learning_rate": 7.646e-06, + "loss": 19.0977, + "step": 38230 + }, + { + "epoch": 0.07724721938291107, + "grad_norm": 214.350341796875, + "learning_rate": 7.648e-06, + "loss": 18.651, + "step": 38240 + }, + { + "epoch": 0.07726742001559489, + "grad_norm": 730.2731323242188, + "learning_rate": 7.650000000000001e-06, + "loss": 20.8267, + "step": 38250 + }, + { + "epoch": 0.07728762064827871, + "grad_norm": 273.54486083984375, + "learning_rate": 7.652e-06, + "loss": 43.3072, + "step": 38260 + }, + { + "epoch": 0.07730782128096252, + "grad_norm": 216.2020263671875, + "learning_rate": 7.654e-06, + "loss": 18.4144, + "step": 38270 + }, + { + "epoch": 0.07732802191364634, + "grad_norm": 244.2738494873047, + "learning_rate": 7.656000000000001e-06, + "loss": 16.5595, + "step": 38280 + }, + { + "epoch": 0.07734822254633016, + "grad_norm": 315.9172668457031, + "learning_rate": 7.658e-06, + "loss": 14.2322, + "step": 38290 + }, + { + "epoch": 0.07736842317901396, + "grad_norm": 353.1627197265625, + "learning_rate": 7.660000000000001e-06, + "loss": 19.8115, + "step": 38300 + }, + { + "epoch": 0.07738862381169778, + "grad_norm": 112.1511459350586, + "learning_rate": 7.662e-06, + "loss": 13.8515, + "step": 38310 + }, + { + "epoch": 0.0774088244443816, + "grad_norm": 296.2076416015625, + "learning_rate": 7.664e-06, + "loss": 21.1189, + "step": 38320 + }, + { + "epoch": 0.07742902507706541, + "grad_norm": 126.26333618164062, + "learning_rate": 7.666e-06, + "loss": 17.0716, + "step": 38330 + }, + { + "epoch": 0.07744922570974923, + "grad_norm": 345.6390686035156, + "learning_rate": 7.668000000000002e-06, + "loss": 15.5117, + "step": 38340 + }, + { + "epoch": 0.07746942634243305, + "grad_norm": 248.05418395996094, + "learning_rate": 7.670000000000001e-06, + "loss": 28.3819, + "step": 38350 + }, + { + "epoch": 0.07748962697511685, + "grad_norm": 560.028076171875, + "learning_rate": 7.672e-06, + "loss": 24.5581, + "step": 38360 + }, + { + "epoch": 0.07750982760780067, + "grad_norm": 2880.149169921875, + "learning_rate": 7.674e-06, + "loss": 39.5029, + "step": 38370 + }, + { + "epoch": 0.0775300282404845, + "grad_norm": 340.9888000488281, + "learning_rate": 7.676e-06, + "loss": 37.1128, + "step": 38380 + }, + { + "epoch": 0.07755022887316831, + "grad_norm": 820.3145751953125, + "learning_rate": 7.678000000000002e-06, + "loss": 23.1238, + "step": 38390 + }, + { + "epoch": 0.07757042950585212, + "grad_norm": 299.7330627441406, + "learning_rate": 7.680000000000001e-06, + "loss": 21.5669, + "step": 38400 + }, + { + "epoch": 0.07759063013853594, + "grad_norm": 537.774658203125, + "learning_rate": 7.682e-06, + "loss": 38.3354, + "step": 38410 + }, + { + "epoch": 0.07761083077121976, + "grad_norm": 205.1475830078125, + "learning_rate": 7.684e-06, + "loss": 26.5034, + "step": 38420 + }, + { + "epoch": 0.07763103140390357, + "grad_norm": 396.5845947265625, + "learning_rate": 7.686e-06, + "loss": 42.9647, + "step": 38430 + }, + { + "epoch": 0.07765123203658739, + "grad_norm": 157.94041442871094, + "learning_rate": 7.688000000000002e-06, + "loss": 34.7968, + "step": 38440 + }, + { + "epoch": 0.0776714326692712, + "grad_norm": 305.363037109375, + "learning_rate": 7.690000000000001e-06, + "loss": 30.2723, + "step": 38450 + }, + { + "epoch": 0.07769163330195501, + "grad_norm": 287.2494812011719, + "learning_rate": 7.692e-06, + "loss": 21.682, + "step": 38460 + }, + { + "epoch": 0.07771183393463883, + "grad_norm": 111.05783081054688, + "learning_rate": 7.694e-06, + "loss": 46.0671, + "step": 38470 + }, + { + "epoch": 0.07773203456732265, + "grad_norm": 459.8643798828125, + "learning_rate": 7.696e-06, + "loss": 34.0809, + "step": 38480 + }, + { + "epoch": 0.07775223520000646, + "grad_norm": 488.703857421875, + "learning_rate": 7.698000000000002e-06, + "loss": 29.491, + "step": 38490 + }, + { + "epoch": 0.07777243583269028, + "grad_norm": 566.2803955078125, + "learning_rate": 7.7e-06, + "loss": 22.0141, + "step": 38500 + }, + { + "epoch": 0.0777926364653741, + "grad_norm": 668.6922607421875, + "learning_rate": 7.702e-06, + "loss": 29.4514, + "step": 38510 + }, + { + "epoch": 0.0778128370980579, + "grad_norm": 255.03981018066406, + "learning_rate": 7.704000000000001e-06, + "loss": 37.2635, + "step": 38520 + }, + { + "epoch": 0.07783303773074172, + "grad_norm": 656.078857421875, + "learning_rate": 7.706e-06, + "loss": 24.1617, + "step": 38530 + }, + { + "epoch": 0.07785323836342554, + "grad_norm": 506.50732421875, + "learning_rate": 7.708000000000001e-06, + "loss": 28.9566, + "step": 38540 + }, + { + "epoch": 0.07787343899610936, + "grad_norm": 86.63217163085938, + "learning_rate": 7.71e-06, + "loss": 29.1458, + "step": 38550 + }, + { + "epoch": 0.07789363962879317, + "grad_norm": 201.97003173828125, + "learning_rate": 7.712e-06, + "loss": 10.9031, + "step": 38560 + }, + { + "epoch": 0.07791384026147699, + "grad_norm": 104.5938491821289, + "learning_rate": 7.714000000000001e-06, + "loss": 17.4683, + "step": 38570 + }, + { + "epoch": 0.07793404089416081, + "grad_norm": 152.68423461914062, + "learning_rate": 7.716e-06, + "loss": 12.3194, + "step": 38580 + }, + { + "epoch": 0.07795424152684462, + "grad_norm": 294.45306396484375, + "learning_rate": 7.718000000000001e-06, + "loss": 39.5909, + "step": 38590 + }, + { + "epoch": 0.07797444215952844, + "grad_norm": 63.02577590942383, + "learning_rate": 7.72e-06, + "loss": 20.2548, + "step": 38600 + }, + { + "epoch": 0.07799464279221226, + "grad_norm": 229.97557067871094, + "learning_rate": 7.722e-06, + "loss": 21.1741, + "step": 38610 + }, + { + "epoch": 0.07801484342489606, + "grad_norm": 218.60293579101562, + "learning_rate": 7.724000000000001e-06, + "loss": 22.0307, + "step": 38620 + }, + { + "epoch": 0.07803504405757988, + "grad_norm": 434.71160888671875, + "learning_rate": 7.726e-06, + "loss": 37.2633, + "step": 38630 + }, + { + "epoch": 0.0780552446902637, + "grad_norm": 665.3933715820312, + "learning_rate": 7.728000000000001e-06, + "loss": 63.3594, + "step": 38640 + }, + { + "epoch": 0.07807544532294751, + "grad_norm": 292.5657043457031, + "learning_rate": 7.73e-06, + "loss": 28.5122, + "step": 38650 + }, + { + "epoch": 0.07809564595563133, + "grad_norm": 435.3643798828125, + "learning_rate": 7.732e-06, + "loss": 41.8224, + "step": 38660 + }, + { + "epoch": 0.07811584658831515, + "grad_norm": 647.57861328125, + "learning_rate": 7.734e-06, + "loss": 30.0907, + "step": 38670 + }, + { + "epoch": 0.07813604722099896, + "grad_norm": 548.1163330078125, + "learning_rate": 7.736e-06, + "loss": 36.9682, + "step": 38680 + }, + { + "epoch": 0.07815624785368278, + "grad_norm": 214.9593505859375, + "learning_rate": 7.738000000000001e-06, + "loss": 24.2031, + "step": 38690 + }, + { + "epoch": 0.0781764484863666, + "grad_norm": 192.67250061035156, + "learning_rate": 7.74e-06, + "loss": 24.6837, + "step": 38700 + }, + { + "epoch": 0.07819664911905042, + "grad_norm": 467.13983154296875, + "learning_rate": 7.742000000000001e-06, + "loss": 30.5346, + "step": 38710 + }, + { + "epoch": 0.07821684975173422, + "grad_norm": 390.44482421875, + "learning_rate": 7.744e-06, + "loss": 43.1372, + "step": 38720 + }, + { + "epoch": 0.07823705038441804, + "grad_norm": 137.72560119628906, + "learning_rate": 7.746e-06, + "loss": 27.8986, + "step": 38730 + }, + { + "epoch": 0.07825725101710186, + "grad_norm": 475.2204895019531, + "learning_rate": 7.748000000000001e-06, + "loss": 23.493, + "step": 38740 + }, + { + "epoch": 0.07827745164978567, + "grad_norm": 1002.3382568359375, + "learning_rate": 7.75e-06, + "loss": 38.5035, + "step": 38750 + }, + { + "epoch": 0.07829765228246949, + "grad_norm": 866.871826171875, + "learning_rate": 7.752000000000001e-06, + "loss": 40.3604, + "step": 38760 + }, + { + "epoch": 0.07831785291515331, + "grad_norm": 209.6705780029297, + "learning_rate": 7.754e-06, + "loss": 29.3788, + "step": 38770 + }, + { + "epoch": 0.07833805354783711, + "grad_norm": 113.38577270507812, + "learning_rate": 7.756e-06, + "loss": 41.4995, + "step": 38780 + }, + { + "epoch": 0.07835825418052093, + "grad_norm": 0.0, + "learning_rate": 7.758000000000001e-06, + "loss": 23.5417, + "step": 38790 + }, + { + "epoch": 0.07837845481320475, + "grad_norm": 854.087890625, + "learning_rate": 7.76e-06, + "loss": 48.5038, + "step": 38800 + }, + { + "epoch": 0.07839865544588856, + "grad_norm": 98.32456970214844, + "learning_rate": 7.762000000000001e-06, + "loss": 28.5804, + "step": 38810 + }, + { + "epoch": 0.07841885607857238, + "grad_norm": 487.97796630859375, + "learning_rate": 7.764e-06, + "loss": 37.1735, + "step": 38820 + }, + { + "epoch": 0.0784390567112562, + "grad_norm": 874.7785034179688, + "learning_rate": 7.766e-06, + "loss": 45.4984, + "step": 38830 + }, + { + "epoch": 0.07845925734394, + "grad_norm": 306.357666015625, + "learning_rate": 7.768e-06, + "loss": 19.5471, + "step": 38840 + }, + { + "epoch": 0.07847945797662383, + "grad_norm": 203.58636474609375, + "learning_rate": 7.77e-06, + "loss": 19.9997, + "step": 38850 + }, + { + "epoch": 0.07849965860930765, + "grad_norm": 331.6130065917969, + "learning_rate": 7.772000000000001e-06, + "loss": 43.4086, + "step": 38860 + }, + { + "epoch": 0.07851985924199147, + "grad_norm": 602.2072143554688, + "learning_rate": 7.774e-06, + "loss": 32.2504, + "step": 38870 + }, + { + "epoch": 0.07854005987467527, + "grad_norm": 225.1083526611328, + "learning_rate": 7.776e-06, + "loss": 17.0911, + "step": 38880 + }, + { + "epoch": 0.07856026050735909, + "grad_norm": 251.9822998046875, + "learning_rate": 7.778e-06, + "loss": 20.4462, + "step": 38890 + }, + { + "epoch": 0.07858046114004291, + "grad_norm": 0.0, + "learning_rate": 7.78e-06, + "loss": 26.7651, + "step": 38900 + }, + { + "epoch": 0.07860066177272672, + "grad_norm": 525.1461791992188, + "learning_rate": 7.782000000000001e-06, + "loss": 28.6109, + "step": 38910 + }, + { + "epoch": 0.07862086240541054, + "grad_norm": 77.64105987548828, + "learning_rate": 7.784e-06, + "loss": 20.1857, + "step": 38920 + }, + { + "epoch": 0.07864106303809436, + "grad_norm": 324.5278015136719, + "learning_rate": 7.786e-06, + "loss": 22.1805, + "step": 38930 + }, + { + "epoch": 0.07866126367077816, + "grad_norm": 336.5326843261719, + "learning_rate": 7.788e-06, + "loss": 27.8223, + "step": 38940 + }, + { + "epoch": 0.07868146430346198, + "grad_norm": 565.1139526367188, + "learning_rate": 7.790000000000002e-06, + "loss": 23.0212, + "step": 38950 + }, + { + "epoch": 0.0787016649361458, + "grad_norm": 656.93408203125, + "learning_rate": 7.792000000000001e-06, + "loss": 15.9798, + "step": 38960 + }, + { + "epoch": 0.07872186556882961, + "grad_norm": 213.3914031982422, + "learning_rate": 7.794e-06, + "loss": 41.1962, + "step": 38970 + }, + { + "epoch": 0.07874206620151343, + "grad_norm": 195.66822814941406, + "learning_rate": 7.796e-06, + "loss": 45.6488, + "step": 38980 + }, + { + "epoch": 0.07876226683419725, + "grad_norm": 699.197021484375, + "learning_rate": 7.798e-06, + "loss": 32.3391, + "step": 38990 + }, + { + "epoch": 0.07878246746688106, + "grad_norm": 186.45999145507812, + "learning_rate": 7.800000000000002e-06, + "loss": 27.1626, + "step": 39000 + }, + { + "epoch": 0.07880266809956488, + "grad_norm": 824.193359375, + "learning_rate": 7.802000000000001e-06, + "loss": 31.3804, + "step": 39010 + }, + { + "epoch": 0.0788228687322487, + "grad_norm": 266.989013671875, + "learning_rate": 7.804e-06, + "loss": 14.4106, + "step": 39020 + }, + { + "epoch": 0.07884306936493252, + "grad_norm": 406.9546813964844, + "learning_rate": 7.806e-06, + "loss": 25.4219, + "step": 39030 + }, + { + "epoch": 0.07886326999761632, + "grad_norm": 454.76141357421875, + "learning_rate": 7.808e-06, + "loss": 24.3998, + "step": 39040 + }, + { + "epoch": 0.07888347063030014, + "grad_norm": 474.2530517578125, + "learning_rate": 7.810000000000001e-06, + "loss": 23.5222, + "step": 39050 + }, + { + "epoch": 0.07890367126298396, + "grad_norm": 537.8236083984375, + "learning_rate": 7.812e-06, + "loss": 36.1553, + "step": 39060 + }, + { + "epoch": 0.07892387189566777, + "grad_norm": 575.7525024414062, + "learning_rate": 7.814e-06, + "loss": 18.1287, + "step": 39070 + }, + { + "epoch": 0.07894407252835159, + "grad_norm": 583.9301147460938, + "learning_rate": 7.816000000000001e-06, + "loss": 37.095, + "step": 39080 + }, + { + "epoch": 0.07896427316103541, + "grad_norm": 410.6824035644531, + "learning_rate": 7.818e-06, + "loss": 25.8169, + "step": 39090 + }, + { + "epoch": 0.07898447379371921, + "grad_norm": 490.35687255859375, + "learning_rate": 7.820000000000001e-06, + "loss": 28.4401, + "step": 39100 + }, + { + "epoch": 0.07900467442640303, + "grad_norm": 13.36561393737793, + "learning_rate": 7.822e-06, + "loss": 37.471, + "step": 39110 + }, + { + "epoch": 0.07902487505908685, + "grad_norm": 153.46160888671875, + "learning_rate": 7.824e-06, + "loss": 26.377, + "step": 39120 + }, + { + "epoch": 0.07904507569177066, + "grad_norm": 327.67999267578125, + "learning_rate": 7.826000000000001e-06, + "loss": 23.6518, + "step": 39130 + }, + { + "epoch": 0.07906527632445448, + "grad_norm": 116.82891845703125, + "learning_rate": 7.828000000000002e-06, + "loss": 18.7044, + "step": 39140 + }, + { + "epoch": 0.0790854769571383, + "grad_norm": 561.8224487304688, + "learning_rate": 7.830000000000001e-06, + "loss": 24.7607, + "step": 39150 + }, + { + "epoch": 0.07910567758982211, + "grad_norm": 250.5322723388672, + "learning_rate": 7.832e-06, + "loss": 17.7831, + "step": 39160 + }, + { + "epoch": 0.07912587822250593, + "grad_norm": 516.6336669921875, + "learning_rate": 7.834e-06, + "loss": 19.4361, + "step": 39170 + }, + { + "epoch": 0.07914607885518975, + "grad_norm": 644.5774536132812, + "learning_rate": 7.836000000000001e-06, + "loss": 17.2036, + "step": 39180 + }, + { + "epoch": 0.07916627948787357, + "grad_norm": 316.52069091796875, + "learning_rate": 7.838000000000002e-06, + "loss": 27.8679, + "step": 39190 + }, + { + "epoch": 0.07918648012055737, + "grad_norm": 269.5502624511719, + "learning_rate": 7.840000000000001e-06, + "loss": 48.5693, + "step": 39200 + }, + { + "epoch": 0.0792066807532412, + "grad_norm": 311.39788818359375, + "learning_rate": 7.842e-06, + "loss": 19.5268, + "step": 39210 + }, + { + "epoch": 0.07922688138592501, + "grad_norm": 291.3498840332031, + "learning_rate": 7.844e-06, + "loss": 34.8294, + "step": 39220 + }, + { + "epoch": 0.07924708201860882, + "grad_norm": 137.64781188964844, + "learning_rate": 7.846e-06, + "loss": 17.3334, + "step": 39230 + }, + { + "epoch": 0.07926728265129264, + "grad_norm": 193.2944793701172, + "learning_rate": 7.848000000000002e-06, + "loss": 23.836, + "step": 39240 + }, + { + "epoch": 0.07928748328397646, + "grad_norm": 994.33984375, + "learning_rate": 7.850000000000001e-06, + "loss": 29.2834, + "step": 39250 + }, + { + "epoch": 0.07930768391666027, + "grad_norm": 223.15631103515625, + "learning_rate": 7.852e-06, + "loss": 24.6491, + "step": 39260 + }, + { + "epoch": 0.07932788454934409, + "grad_norm": 439.4519348144531, + "learning_rate": 7.854e-06, + "loss": 28.0021, + "step": 39270 + }, + { + "epoch": 0.0793480851820279, + "grad_norm": 693.3170776367188, + "learning_rate": 7.856e-06, + "loss": 32.0823, + "step": 39280 + }, + { + "epoch": 0.07936828581471171, + "grad_norm": 307.3612976074219, + "learning_rate": 7.858000000000002e-06, + "loss": 25.5835, + "step": 39290 + }, + { + "epoch": 0.07938848644739553, + "grad_norm": 332.3131408691406, + "learning_rate": 7.860000000000001e-06, + "loss": 22.3718, + "step": 39300 + }, + { + "epoch": 0.07940868708007935, + "grad_norm": 221.25428771972656, + "learning_rate": 7.862e-06, + "loss": 28.4674, + "step": 39310 + }, + { + "epoch": 0.07942888771276316, + "grad_norm": 165.73020935058594, + "learning_rate": 7.864000000000001e-06, + "loss": 23.2906, + "step": 39320 + }, + { + "epoch": 0.07944908834544698, + "grad_norm": 364.01385498046875, + "learning_rate": 7.866e-06, + "loss": 29.6909, + "step": 39330 + }, + { + "epoch": 0.0794692889781308, + "grad_norm": 484.53143310546875, + "learning_rate": 7.868000000000002e-06, + "loss": 28.3154, + "step": 39340 + }, + { + "epoch": 0.07948948961081462, + "grad_norm": 126.66886138916016, + "learning_rate": 7.870000000000001e-06, + "loss": 18.5973, + "step": 39350 + }, + { + "epoch": 0.07950969024349842, + "grad_norm": 253.3115234375, + "learning_rate": 7.872e-06, + "loss": 24.0437, + "step": 39360 + }, + { + "epoch": 0.07952989087618224, + "grad_norm": 1409.166748046875, + "learning_rate": 7.874000000000001e-06, + "loss": 49.3596, + "step": 39370 + }, + { + "epoch": 0.07955009150886606, + "grad_norm": 170.87071228027344, + "learning_rate": 7.876e-06, + "loss": 25.401, + "step": 39380 + }, + { + "epoch": 0.07957029214154987, + "grad_norm": 80.8373031616211, + "learning_rate": 7.878e-06, + "loss": 26.2307, + "step": 39390 + }, + { + "epoch": 0.07959049277423369, + "grad_norm": 157.31773376464844, + "learning_rate": 7.88e-06, + "loss": 14.1911, + "step": 39400 + }, + { + "epoch": 0.07961069340691751, + "grad_norm": 174.0682830810547, + "learning_rate": 7.882e-06, + "loss": 20.9031, + "step": 39410 + }, + { + "epoch": 0.07963089403960132, + "grad_norm": 192.36581420898438, + "learning_rate": 7.884000000000001e-06, + "loss": 23.4383, + "step": 39420 + }, + { + "epoch": 0.07965109467228514, + "grad_norm": 355.9923095703125, + "learning_rate": 7.886e-06, + "loss": 27.7255, + "step": 39430 + }, + { + "epoch": 0.07967129530496896, + "grad_norm": 444.078125, + "learning_rate": 7.888e-06, + "loss": 18.7206, + "step": 39440 + }, + { + "epoch": 0.07969149593765276, + "grad_norm": 421.5205078125, + "learning_rate": 7.89e-06, + "loss": 20.7926, + "step": 39450 + }, + { + "epoch": 0.07971169657033658, + "grad_norm": 483.5147399902344, + "learning_rate": 7.892e-06, + "loss": 37.6867, + "step": 39460 + }, + { + "epoch": 0.0797318972030204, + "grad_norm": 188.4382781982422, + "learning_rate": 7.894000000000001e-06, + "loss": 34.2945, + "step": 39470 + }, + { + "epoch": 0.07975209783570421, + "grad_norm": 433.1778564453125, + "learning_rate": 7.896e-06, + "loss": 12.7678, + "step": 39480 + }, + { + "epoch": 0.07977229846838803, + "grad_norm": 172.69244384765625, + "learning_rate": 7.898e-06, + "loss": 18.4869, + "step": 39490 + }, + { + "epoch": 0.07979249910107185, + "grad_norm": 365.7929992675781, + "learning_rate": 7.9e-06, + "loss": 20.3563, + "step": 39500 + }, + { + "epoch": 0.07981269973375565, + "grad_norm": 270.8347473144531, + "learning_rate": 7.902000000000002e-06, + "loss": 23.8935, + "step": 39510 + }, + { + "epoch": 0.07983290036643947, + "grad_norm": 178.36856079101562, + "learning_rate": 7.904000000000001e-06, + "loss": 33.3036, + "step": 39520 + }, + { + "epoch": 0.0798531009991233, + "grad_norm": 364.9798583984375, + "learning_rate": 7.906e-06, + "loss": 35.2624, + "step": 39530 + }, + { + "epoch": 0.07987330163180711, + "grad_norm": 453.27178955078125, + "learning_rate": 7.908e-06, + "loss": 26.2489, + "step": 39540 + }, + { + "epoch": 0.07989350226449092, + "grad_norm": 220.4715576171875, + "learning_rate": 7.91e-06, + "loss": 19.8085, + "step": 39550 + }, + { + "epoch": 0.07991370289717474, + "grad_norm": 264.56671142578125, + "learning_rate": 7.912000000000001e-06, + "loss": 18.68, + "step": 39560 + }, + { + "epoch": 0.07993390352985856, + "grad_norm": 223.5511932373047, + "learning_rate": 7.914e-06, + "loss": 18.9332, + "step": 39570 + }, + { + "epoch": 0.07995410416254237, + "grad_norm": 379.7696228027344, + "learning_rate": 7.916e-06, + "loss": 20.1655, + "step": 39580 + }, + { + "epoch": 0.07997430479522619, + "grad_norm": 433.83953857421875, + "learning_rate": 7.918e-06, + "loss": 18.233, + "step": 39590 + }, + { + "epoch": 0.07999450542791, + "grad_norm": 566.2931518554688, + "learning_rate": 7.92e-06, + "loss": 30.7419, + "step": 39600 + }, + { + "epoch": 0.08001470606059381, + "grad_norm": 138.607421875, + "learning_rate": 7.922000000000001e-06, + "loss": 22.5985, + "step": 39610 + }, + { + "epoch": 0.08003490669327763, + "grad_norm": 351.9343566894531, + "learning_rate": 7.924e-06, + "loss": 44.8302, + "step": 39620 + }, + { + "epoch": 0.08005510732596145, + "grad_norm": 298.3395080566406, + "learning_rate": 7.926e-06, + "loss": 14.4203, + "step": 39630 + }, + { + "epoch": 0.08007530795864526, + "grad_norm": 175.9684295654297, + "learning_rate": 7.928e-06, + "loss": 36.0825, + "step": 39640 + }, + { + "epoch": 0.08009550859132908, + "grad_norm": 512.1127319335938, + "learning_rate": 7.93e-06, + "loss": 30.4307, + "step": 39650 + }, + { + "epoch": 0.0801157092240129, + "grad_norm": 1372.822998046875, + "learning_rate": 7.932000000000001e-06, + "loss": 69.6502, + "step": 39660 + }, + { + "epoch": 0.0801359098566967, + "grad_norm": 298.2317810058594, + "learning_rate": 7.934e-06, + "loss": 33.3147, + "step": 39670 + }, + { + "epoch": 0.08015611048938052, + "grad_norm": 310.0917053222656, + "learning_rate": 7.936e-06, + "loss": 51.5298, + "step": 39680 + }, + { + "epoch": 0.08017631112206434, + "grad_norm": 45.98925018310547, + "learning_rate": 7.938000000000001e-06, + "loss": 34.7196, + "step": 39690 + }, + { + "epoch": 0.08019651175474816, + "grad_norm": 322.8241882324219, + "learning_rate": 7.94e-06, + "loss": 35.661, + "step": 39700 + }, + { + "epoch": 0.08021671238743197, + "grad_norm": 180.28321838378906, + "learning_rate": 7.942000000000001e-06, + "loss": 9.7107, + "step": 39710 + }, + { + "epoch": 0.08023691302011579, + "grad_norm": 339.8468017578125, + "learning_rate": 7.944e-06, + "loss": 22.9418, + "step": 39720 + }, + { + "epoch": 0.08025711365279961, + "grad_norm": 404.9114990234375, + "learning_rate": 7.946e-06, + "loss": 23.3855, + "step": 39730 + }, + { + "epoch": 0.08027731428548342, + "grad_norm": 406.2514953613281, + "learning_rate": 7.948e-06, + "loss": 18.2815, + "step": 39740 + }, + { + "epoch": 0.08029751491816724, + "grad_norm": 206.7250518798828, + "learning_rate": 7.950000000000002e-06, + "loss": 37.404, + "step": 39750 + }, + { + "epoch": 0.08031771555085106, + "grad_norm": 159.21121215820312, + "learning_rate": 7.952000000000001e-06, + "loss": 28.1668, + "step": 39760 + }, + { + "epoch": 0.08033791618353486, + "grad_norm": 114.62528228759766, + "learning_rate": 7.954e-06, + "loss": 26.7519, + "step": 39770 + }, + { + "epoch": 0.08035811681621868, + "grad_norm": 230.16160583496094, + "learning_rate": 7.956e-06, + "loss": 37.9638, + "step": 39780 + }, + { + "epoch": 0.0803783174489025, + "grad_norm": 213.52178955078125, + "learning_rate": 7.958e-06, + "loss": 39.4954, + "step": 39790 + }, + { + "epoch": 0.08039851808158631, + "grad_norm": 584.9029541015625, + "learning_rate": 7.960000000000002e-06, + "loss": 13.7502, + "step": 39800 + }, + { + "epoch": 0.08041871871427013, + "grad_norm": 290.5389099121094, + "learning_rate": 7.962000000000001e-06, + "loss": 39.4151, + "step": 39810 + }, + { + "epoch": 0.08043891934695395, + "grad_norm": 558.5338134765625, + "learning_rate": 7.964e-06, + "loss": 58.5718, + "step": 39820 + }, + { + "epoch": 0.08045911997963776, + "grad_norm": 161.5792694091797, + "learning_rate": 7.966e-06, + "loss": 17.8117, + "step": 39830 + }, + { + "epoch": 0.08047932061232158, + "grad_norm": 362.5206604003906, + "learning_rate": 7.968e-06, + "loss": 20.2731, + "step": 39840 + }, + { + "epoch": 0.0804995212450054, + "grad_norm": 143.33853149414062, + "learning_rate": 7.970000000000002e-06, + "loss": 20.3367, + "step": 39850 + }, + { + "epoch": 0.08051972187768922, + "grad_norm": 542.078369140625, + "learning_rate": 7.972000000000001e-06, + "loss": 19.1295, + "step": 39860 + }, + { + "epoch": 0.08053992251037302, + "grad_norm": 435.0280456542969, + "learning_rate": 7.974e-06, + "loss": 23.3512, + "step": 39870 + }, + { + "epoch": 0.08056012314305684, + "grad_norm": 465.48614501953125, + "learning_rate": 7.976000000000001e-06, + "loss": 21.44, + "step": 39880 + }, + { + "epoch": 0.08058032377574066, + "grad_norm": 265.0390625, + "learning_rate": 7.978e-06, + "loss": 22.3309, + "step": 39890 + }, + { + "epoch": 0.08060052440842447, + "grad_norm": 332.6612243652344, + "learning_rate": 7.980000000000002e-06, + "loss": 24.4405, + "step": 39900 + }, + { + "epoch": 0.08062072504110829, + "grad_norm": 153.6615753173828, + "learning_rate": 7.982e-06, + "loss": 44.8212, + "step": 39910 + }, + { + "epoch": 0.08064092567379211, + "grad_norm": 295.36700439453125, + "learning_rate": 7.984e-06, + "loss": 16.5709, + "step": 39920 + }, + { + "epoch": 0.08066112630647591, + "grad_norm": 449.8714599609375, + "learning_rate": 7.986000000000001e-06, + "loss": 42.9904, + "step": 39930 + }, + { + "epoch": 0.08068132693915973, + "grad_norm": 280.0345153808594, + "learning_rate": 7.988e-06, + "loss": 44.0653, + "step": 39940 + }, + { + "epoch": 0.08070152757184355, + "grad_norm": 90.7134017944336, + "learning_rate": 7.990000000000001e-06, + "loss": 19.1288, + "step": 39950 + }, + { + "epoch": 0.08072172820452736, + "grad_norm": 610.0533447265625, + "learning_rate": 7.992e-06, + "loss": 33.2247, + "step": 39960 + }, + { + "epoch": 0.08074192883721118, + "grad_norm": 709.1533203125, + "learning_rate": 7.994e-06, + "loss": 19.8445, + "step": 39970 + }, + { + "epoch": 0.080762129469895, + "grad_norm": 237.41796875, + "learning_rate": 7.996000000000001e-06, + "loss": 21.7348, + "step": 39980 + }, + { + "epoch": 0.0807823301025788, + "grad_norm": 384.1241149902344, + "learning_rate": 7.998e-06, + "loss": 40.8574, + "step": 39990 + }, + { + "epoch": 0.08080253073526263, + "grad_norm": 416.15740966796875, + "learning_rate": 8.000000000000001e-06, + "loss": 13.9586, + "step": 40000 } ], "logging_steps": 10,